In [None]:
from langchain_chroma import Chroma
from langchain_community.embeddings import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.llms import Ollama
from langchain_community.document_loaders import TextLoader

from langchain_community.chat_models import ChatOllama
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableParallel, RunnablePassthrough


# Load documents

Load documents to do question answering over. If you want to do this over your documents, this is the section you should replace.



In [None]:
loader = TextLoader('state_of_the_union.txt')
documents = loader.load()

# Split documents

Split documents into small chunks. This is so we can find the most relevant chunks for a query and pass only those into the LLM.

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
texts = text_splitter.split_documents(documents)

In [None]:
len(texts)

# Initialize ChromaDB

Create embeddings for each chunk and insert into the Chroma vector database.


In [None]:
embeddings = OllamaEmbeddings(model="nomic-embed-text")
vectordb = Chroma.from_documents(texts, embeddings,persist_directory="./chroma_db")


In [None]:
docs = vectordb.similarity_search("What did the president say about Ketanji Brown Jackson")

In [None]:
len(docs)

# RAG

In [None]:
# LLM prompt template
template = """You are an assistant for specific knowledge query tasks. 
   Use the following pieces of retrieved context to answer the question. 
   If you don't know the answer, just say that you don't know. 
   Question: {question} 
   Context: {context} 
   Answer:
   """

# RAG prompt
template1 = """Answer the question based only on the following context:
{context}
Question: {question}
"""

In [None]:
# supports many more optional parameters. Hover on your `ChatOllama(...)`
# class to view the latest available supported parameters

llm = ChatOllama(model="phi3")



prompt = ChatPromptTemplate.from_template(template)


In [None]:
retriever = vectordb.as_retriever(k=3)

In [None]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [None]:
# RAG

chain = (
    RunnableParallel({"context": retriever | format_docs, "question": RunnablePassthrough()})
    | prompt
    | llm
    | StrOutputParser()
)

see link below how to return the sources
https://python.langchain.com/v0.1/docs/use_cases/question_answering/sources/

In [None]:
rag_chain_from_docs = (
    RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"])))
    | prompt
    | llm
    | StrOutputParser()
)

chain = RunnableParallel(
    {"context": retriever, "question": RunnablePassthrough()}
).assign(answer=rag_chain_from_docs)

# Ask

In [None]:

res = chain.invoke("how much will intel will invest?")

In [None]:
res