In [2]:
import argparse

from langchain.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma #Import Chroma to generate and store new embeddings
from langchain.embeddings import OllamaEmbeddings 

from langchain.llms import Ollama
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler


parser = argparse.ArgumentParser(description='Filter out URL argument.')

_StoreAction(option_strings=['--url'], dest='url', nargs=None, const=None, default='https://news.google.co.uk/', type=<class 'str'>, choices=None, required=True, help='The URL to filter out.', metavar=None)

In [15]:
import argparse
import sys

if __name__ == "__main__":
    if "ipykernel_launcher" in sys.argv[0]:  # Running in a Jupyter Notebook
        url = 'https://python.langchain.com/docs/integrations/text_embedding/ollama/'
    else:
        parser = argparse.ArgumentParser(description='Filter out URL argument.')
        parser.add_argument('--url', type=str, required=True, help='The URL to filter out.')
        args = parser.parse_args()
        url = args.url

    print(f"URL: {url}")


loader = WebBaseLoader(url)
data = loader.load()

URL: https://python.langchain.com/docs/integrations/text_embedding/ollama/


In [17]:
# Split into chunks 
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=100)
all_splits = text_splitter.split_documents(data)
print(f"Split into {len(all_splits)} chunks")

vectorstore = Chroma.from_documents(documents=all_splits, embedding=OllamaEmbeddings(model="phi3"))

Split into 16 chunks


In [18]:
print(f"Loaded {len(data)} documents")

# RAG prompt
from langchain import hub
QA_CHAIN_PROMPT = hub.pull("rlm/rag-prompt-llama")


# LLM
llm = Ollama(model="phi3",
            verbose=True,
            callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]))
print(f"Loaded LLM model {llm.model}")

Loaded 1 documents




Loaded LLM model phi3


  llm = Ollama(model="phi3",


In [19]:
# QA chain
from langchain.chains import RetrievalQA
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectorstore.as_retriever(),
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT},

)

In [20]:
question = f"Summarize the content on {url}?"
result = qa_chain({"query": question})

To embed multiple texts using Ollama, first import the necessary libraries and load your documents. Then use `embedd0m_graph` to generate text vectors for each document by iterating over them with a loop or list comprehension, printing out the results as needed:

```python
from langchain.embedders import embedd0m_graph  # Import Ollama's embedding model from LangChain library
text1 = "LangGraph is a library for building stateful, multi-actor applications with LLMs"
text2 = (    "OllamaEmbeddings features and configuration options can be found in the API reference." )  # Example second text
embedder = embedd0m_graph()  # Initialize Ollama's embedding model from LangChain library
two_vectors = [embedder.encode(text).tolist() for text in (text1, text2)]  # Embed and store vectors of both texts
for vector in two_vectors:
    print(str(vector)[:100])  # Display the first 100 characters representing each embedded document's vector.
```