In [13]:
len(splitted_text)
#splitted_text[0]


9

In [3]:
from langchain.document_loaders import TextLoader
from langchain_ollama import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_postgres import PGVector

# Load the document, split it with chunks
text = TextLoader("./test.txt").load()
splitter = RecursiveCharacterTextSplitter(chunk_size=150,chunk_overlap=10)
splitted_text = splitter.split_documents(text)

# Embed each chunk and insert it into the vector store
embedding_model = OllamaEmbeddings(model ="llama3.1")
connection = "postgresql+psycopg://langchain:langchain@localhost:6024/langchain"
db = PGVector.from_documents(splitted_text, embedding=embedding_model, connection=connection)


In [64]:
# create a retriever
retriever = db.as_retriever(search_kwargs={"k": 1}, search_type="mmr")

# Fetch relevant documents
query_user = retriever.invoke("what is the material weighs?")
query_user

[Document(id='c4ca181a-ae8c-4408-94d5-d7b95de37b5f', metadata={'source': './test.txt'}, page_content='than that with the double-sided tape while providing other benefits.')]

In [66]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama import ChatOllama

retriever = db.as_retriever()

prompt = ChatPromptTemplate.from_template("""Answer the question based on only the following context:
                                          {context}
                                          Question: {question}
                                          """)
llm = ChatOllama(model="llama3.1", temperature=0)

chain = prompt | llm

#Fetch relevant documents
docs = retriever.get_relevant_documents("what is the average intersession word accuracy?")

#Run
chain.invoke({"context": docs, "question": "what is the average intersession word accuracy"})

AIMessage(content='76.50%', additional_kwargs={}, response_metadata={'model': 'llama3.1', 'created_at': '2025-08-27T10:41:51.121697548Z', 'done': True, 'done_reason': 'stop', 'total_duration': 43107632741, 'load_duration': 48364827, 'prompt_eval_count': 295, 'prompt_eval_duration': 41844492861, 'eval_count': 5, 'eval_duration': 1214123684, 'model_name': 'llama3.1'}, id='run--8a38235a-c35e-4c65-bbfd-0a433dbaa03b-0', usage_metadata={'input_tokens': 295, 'output_tokens': 5, 'total_tokens': 300})

In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama import ChatOllama
from langchain_core.runnables import chain

retriever = db.as_retriever()

prompt = ChatPromptTemplate.from_template("""Answer the question based on only the following context:
                                          {context}
                                          Question: {question}
                                          """)
llm = ChatOllama(model="llama3.1", temperature=0)

@chain
def qa(input):
    #fetch relevant docs
    docs = retriever.get_relevant_documents(input)
    # format prompt
    formatted = prompt.invoke({"context": docs, "question": input})
    #generate answer
    answer = llm.invoke(formatted)
    return answer
#run
qa.invoke(input="what is the average intersession word accuracy")

AIMessage(content='76.50% and 68.18%.', additional_kwargs={}, response_metadata={'model': 'llama3.1', 'created_at': '2025-08-27T13:22:59.28521524Z', 'done': True, 'done_reason': 'stop', 'total_duration': 47126228057, 'load_duration': 48121590, 'prompt_eval_count': 309, 'prompt_eval_duration': 44248532861, 'eval_count': 11, 'eval_duration': 2828484865, 'model_name': 'llama3.1'}, id='run--8334bd70-1d50-4c47-a738-c8b8528482d4-0', usage_metadata={'input_tokens': 309, 'output_tokens': 11, 'total_tokens': 320})

In [73]:
rewrite_prompt = ChatPromptTemplate.from_template("""Provide a better search query without additional info .
                                                   end the queries with '**'.question:{x} Answer:""")
def parse_rewriter_output(message):
    return message.content.strip('"').strip('**')
rewriter = rewrite_prompt | llm | parse_rewriter_output

@chain
def qa_rrr(input):
    # rewrite the query
    new_query = rewriter.invoke(input)
    #fetch relevant doc
    docs = retriever.get_relevant_documents(new_query)
    #format prompt
    formatted = prompt.invoke({"context": docs, "question": input})
    #generate answer
    answer = llm.invoke(formatted)
    return answer

#run
qa_rrr.invoke(input="Today I woke up and brushed my teeth, then I sat down to read the news. But then I forgot the food on the cooker.what is the average intersession word accuracy")

AIMessage(content="I don't have information about your activities or the news you were reading. However, according to the provided context, an average intersession word accuracy of 76.50% was obtained using a bidirectional long short-term memory network for classification.", additional_kwargs={}, response_metadata={'model': 'llama3.1', 'created_at': '2025-08-27T17:17:25.821047598Z', 'done': True, 'done_reason': 'stop', 'total_duration': 68874507486, 'load_duration': 52761491, 'prompt_eval_count': 322, 'prompt_eval_duration': 54213159690, 'eval_count': 50, 'eval_duration': 14608016512, 'model_name': 'llama3.1'}, id='run--00e1cabf-2a90-420a-aff0-1fb668dc0165-0', usage_metadata={'input_tokens': 322, 'output_tokens': 50, 'total_tokens': 372})

In [6]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama import ChatOllama
from langchain_core.runnables import chain

perspectives_prompt = ChatPromptTemplate.from_template(
    """You are an AI language model assistant. Your task is to generate five different versions of the given user question to retrieve relevant documents from a vector database. 
    By generating multiple perspectives on the user question, your goal is to help the user overcome some of the limitations of the distance-based  similarity search. 
    Provide these alternative questions separated by newlines. 
    Original question: {question}""")

llm = ChatOllama(model="llama3.1")

def parse_queries_output(message):
    return message.content.split('\n')

query_gen = perspectives_prompt | llm | parse_queries_output

def get_unique_union(document_lists):
    #Flatten lis of lists, and dedupe them
    deduped_docs = {doc.page_content: doc
                    for sublist in document_lists for doc in sublist}
    #return a flat list of unique docs
    return list(deduped_docs.values())

retriever = db.as_retriever()

retrieval_chain = query_gen | retriever.batch | get_unique_union

prompt = ChatPromptTemplate.from_template("""Answer the following question based on this context:
                                          {context}
                                          Question: {question}
                                          """)
@chain 
def multi_query_qa(input):
    docs = retrieval_chain.invoke(input)
    formatted = prompt.invoke({"context": docs , "question": input})
    answer=llm.invoke(formatted)
    return answer
#run
multi_query_qa.invoke("""what is the average intersession word accuracy?""")

AIMessage(content='76.50% for classification, and 68.18% in general.', additional_kwargs={}, response_metadata={'model': 'llama3.1', 'created_at': '2025-08-28T18:42:00.579047422Z', 'done': True, 'done_reason': 'stop', 'total_duration': 47412686188, 'load_duration': 46268909, 'prompt_eval_count': 291, 'prompt_eval_duration': 42806711645, 'eval_count': 17, 'eval_duration': 4559208152, 'model_name': 'llama3.1'}, id='run--ba8be1d2-fb97-4fc8-bca9-ddef70ad0e0e-0', usage_metadata={'input_tokens': 291, 'output_tokens': 17, 'total_tokens': 308})

In [10]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama import ChatOllama

prompt_rag_fusion = ChatPromptTemplate.from_template("""You are a helpful assistant that generates multiple search queries based on a single input query.
                                                      \n Generate multiple search queries related to: {question} 
                                                     \n Output (4 queries):""")
def parse_queries_output(message):
    return message.content.split('\n')

llm = ChatOllama(model='llama3.1')

query_gen = prompt_rag_fusion | llm | parse_queries_output


def reciprocal_rank_fusion(results: list[list], k=60):
    """reciprocal rank fusion on multiple lists of ranked documents and an optional parameter k used in the RRF formula"""
    # Initialize a dictionary to hold fused scores for each document
    # Documents will be keyed by their contents to ensure uniqueness
    fused_scores = {}
    documents = {}
    for docs in results:
        # Iterate through each document in the list, with its rank (position in the list)
        for rank, doc in enumerate(docs):
            doc_str = doc.page_content
            if doc_str not in fused_scores:
                fused_scores[doc_str] = 0
                documents[doc_str] = doc
            fused_scores[doc_str] += 1 / (rank + k)
    # sort the documents based on their fused scores in descending order to get the final reranked results
    reranked_doc_strs = sorted(
        fused_scores, key=lambda d: fused_scores[d], reverse=True)
    return [documents[doc_str] for doc_str in reranked_doc_strs]


retrieval_chain = query_gen | retriever.batch | reciprocal_rank_fusion


prompt = ChatPromptTemplate.from_template(
    """Answer the question based only on the following context: {context} Question: {question} """
)

@chain
def rag_fusion(input):
    # fetch relevant documents
    docs = retrieval_chain.invoke(input)  # format prompt
    formatted = prompt.invoke(
        {"context": docs, "question": input})  # generate answer
    answer = llm.invoke(formatted)
    return answer

multi_query_qa.invoke("what is the average intersession word accuracy?")

AIMessage(content='76.50% (for classification) and 68.18%', additional_kwargs={}, response_metadata={'model': 'llama3.1', 'created_at': '2025-08-28T19:10:17.750903374Z', 'done': True, 'done_reason': 'stop', 'total_duration': 56086524028, 'load_duration': 56602135, 'prompt_eval_count': 355, 'prompt_eval_duration': 51970466952, 'eval_count': 15, 'eval_duration': 4058949872, 'model_name': 'llama3.1'}, id='run--46ce5aa5-12dc-45b9-824c-3ca194f2b3e4-0', usage_metadata={'input_tokens': 355, 'output_tokens': 15, 'total_tokens': 370})

In [12]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama import ChatOllama
from langchain_core.output_parsers import StrOutputParser

prompt_hyde = ChatPromptTemplate.from_template("""Please write a passage to answer the question.\n
                                                Question: {question}\n
                                                Passage:""")

generate_doc = (prompt_hyde | llm | StrOutputParser())
retrieval_chain = generate_doc | retriever

prompt = ChatPromptTemplate.from_template(
    """Answer the question based only on the following context: {context} Question: {question} """
)

llm = ChatOllama(model="llama3.1", temperature=0)

@chain
def qa(input):
    # fetch relevant documents from the hyde retrieval chain defined earlier
    docs = retrieval_chain.invoke(input)
    # format prompt
    formatted = prompt.invoke({"context": docs, "question": input})
    # generate answer
    answer = llm.invoke(formatted)
    return answer

query = "what is the average intersession word accuracy?"

print("Running hyde\n")
result = qa.invoke(query)
print("\n\n")
print(result.content)

Running hyde




According to the provided context, the average intersession word accuracy is 72.34%. This value is not explicitly stated in the context, but it can be calculated by taking the average of the two values mentioned for each pair of documents with similar content: 

76.50% (from Document '7d367161-eda2-42b9-a487-e4634ef0f9a0') and 68.18% (also from Document '7d367161-eda2-42b9-a487-e4634ef0f9a0').
