In [9]:
# Load the dataset from Huggingface using datasets
from datasets import load_dataset
ds = load_dataset("jamescalam/ai-arxiv-chunked", split="train")
ds

Dataset({
    features: ['doi', 'chunk-id', 'chunk', 'id', 'title', 'summary', 'source', 'authors', 'categories', 'comment', 'journal_ref', 'primary_category', 'published', 'updated', 'references'],
    num_rows: 41584
})

In [24]:
# build the document with metadata to be pushed to Vector index
from langchain.docstore.document import Document
docs =[]

for row in ds:
   doc = Document(
       page_content=row['chunk'],
       metadata={
           'source': row['source'],
           'title': row['title'],
           'text': row['chunk'],
           'chunk-id':row['chunk-id'],
           'id':row['id']
         })
   docs.append(doc)
    

## Embedding and Vector Store Setup

In [27]:
import os
from getpass import getpass
from langchain.embeddings.openai import OpenAIEmbeddings
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())

model_name ="text-embedding-ada-002"

embeddings = OpenAIEmbeddings(model=model_name,disallowed_special=())

In [19]:
from pinecone import Pinecone
api_key = os.environ['PINECONE_API_KEY']
pc = Pinecone(api_key=api_key)

In [20]:
from pinecone import ServerlessSpec
spec = ServerlessSpec(cloud="aws", region="us-east-1")

In [28]:
# create the Index
import time
index_name ="langchain-multi-query-demo"
existing_indexes =[ index_info["name"] for index_info in pc.list_indexes()]

#chck if the index exists
if index_name not in existing_indexes:
    print(f"Index '{index_name}' not already exists. creating now")
    pc.create_index(index_name,
                    dimension=1536,
                    metric ="dotproduct",
                    spec =spec
                    )
    #wait for index to be created
    while not pc.describe_index(index_name).status["ready"]:
        time.sleep(1)

    # connect to index
    index = pc.Index(index_name)
    time.sleep(1)
    #view index status
    index.describe_index_stats()

Index 'langchain-multi-query-demo' not already exists. creating now


In [25]:
len(docs)

41584

In [29]:
# create batch and upload the data to pinecone index

from tqdm.auto import tqdm
batch_size = 200

for i in tqdm(range(0,len(docs),batch_size)):
    i_end =min(len(docs), i+batch_size)
    docs_batch = docs[i:i_end]
    #get the ids
    ids =[f"{doc.metadata['id']} - {doc.metadata['chunk-id']}" for doc in docs_batch]
    # get text and embeddings
    texts = [doc.page_content for doc in docs_batch]
    embeds =embeddings.embed_documents(texts)
    #get metadata
    metadata = [doc.metadata for doc in docs_batch]
    to_upsert = zip(ids, embeds, metadata)
    index.upsert(vectors=to_upsert)

100%|██████████| 208/208 [25:17<00:00,  7.30s/it]


In [36]:
from langchain.vectorstores import Pinecone
text_field = "text"
vectorstore = Pinecone(index, embeddings.embed_query, text_field)



In [37]:
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.0)

In [38]:
from langchain.retrievers.multi_query import MultiQueryRetriever
retriever = MultiQueryRetriever.from_llm(
    llm=llm,
    retriever= vectorstore.as_retriever()
)

In [59]:
import logging
logging.basicConfig()
logging.getLogger("langchain.retrievers.multi_query").setLevel(logging.INFO)

In [42]:
question =" Tell me about llama2?"
docs = retriever.get_relevant_documents(question)
len(docs)

 2024-08-02 16:28:52,775 - DEBUG- Request options: {'method': 'post', 'url': '/chat/completions', 'files': None, 'json_data': {'messages': [{'content': 'You are an AI language model assistant. Your task is \n    to generate 3 different versions of the given user \n    question to retrieve relevant documents from a vector  database. \n    By generating multiple perspectives on the user question, \n    your goal is to help the user overcome some of the limitations \n    of distance-based similarity search. Provide these alternative \n    questions separated by newlines. Original question:  Tell me about llama2?', 'role': 'user'}], 'model': 'gpt-3.5-turbo', 'logprobs': False, 'n': 1, 'stream': False, 'temperature': 0.0}}
 2024-08-02 16:28:52,778 - DEBUG- Sending HTTP Request: POST https://api.openai.com/v1/chat/completions
 2024-08-02 16:28:52,780 - DEBUG- close.started
 2024-08-02 16:28:52,783 - DEBUG- close.complete
 2024-08-02 16:28:52,784 - DEBUG- connect_tcp.started host='api.openai.

 2024-08-02 16:28:52,956 - DEBUG- connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x0000023C4CCB5700>
 2024-08-02 16:28:52,958 - DEBUG- start_tls.started ssl_context=<ssl.SSLContext object at 0x0000023C639218D0> server_hostname='api.openai.com' timeout=None
 2024-08-02 16:28:53,094 - DEBUG- start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x0000023C4CCB5B20>
 2024-08-02 16:28:53,095 - DEBUG- send_request_headers.started request=<Request [b'POST']>
 2024-08-02 16:28:53,098 - DEBUG- send_request_headers.complete
 2024-08-02 16:28:53,099 - DEBUG- send_request_body.started request=<Request [b'POST']>
 2024-08-02 16:28:53,100 - DEBUG- send_request_body.complete
 2024-08-02 16:28:53,102 - DEBUG- receive_response_headers.started request=<Request [b'POST']>
 2024-08-02 16:28:53,965 - DEBUG- receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 02 Aug 2024 20:28:54 GMT'), (b'Content-Type', b'applic

5

# Adding Generation in RAG
- Pass the retrieved Docs from vector store to LLM and get the final answer

In [47]:
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

prompt = PromptTemplate(
    input_variables=["question", "contexts"],
    template=""" You are a helpful assistant who answers user queries using the contexts provided. If the question
                cannot be answered using the contexts, you will say "I don't know".
                Contexts: {contexts} 

                Question: {question} """,
)
chain = LLMChain(llm=llm, prompt=prompt)

In [50]:

response = chain(inputs={
    "question":question,
    "contexts":"\n---\n".join([d.page_content for d in docs])
})

 2024-08-02 16:48:14,698 - DEBUG- Request options: {'method': 'post', 'url': '/chat/completions', 'files': None, 'json_data': {'messages': [{'content': ' You are a helpful assistant who answers user queries using the contexts provided. If the question\n                cannot be answered using the contexts, you will say "I don\'t know".\n                Contexts: Alan Schelten Ruan Silva Eric Michael Smith Ranjan Subramanian Xiaoqing Ellen Tan Binh Tang\nRoss Taylor Adina Williams Jian Xiang Kuan Puxin Xu Zheng Yan Iliyan Zarov Yuchen Zhang\nAngela Fan Melanie Kambadur Sharan Narang Aurelien Rodriguez Robert Stojnic\nSergey Edunov Thomas Scialom\x03\nGenAI, Meta\nAbstract\nIn this work, we develop and release Llama 2, a collection of pretrained and ﬁne-tuned\nlarge language models (LLMs) ranging in scale from 7 billion to 70 billion parameters.\nOur ﬁne-tuned LLMs, called L/l.sc/a.sc/m.sc/a.sc /two.taboldstyle-C/h.sc/a.sc/t.sc , are optimized for dialogue use cases. Our\nmodels outperfo

 2024-08-02 16:48:14,811 - DEBUG- connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x0000023C4313B200>
 2024-08-02 16:48:14,813 - DEBUG- start_tls.started ssl_context=<ssl.SSLContext object at 0x0000023C639218D0> server_hostname='api.openai.com' timeout=None
 2024-08-02 16:48:14,857 - DEBUG- start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x0000023C576D30B0>
 2024-08-02 16:48:14,859 - DEBUG- send_request_headers.started request=<Request [b'POST']>
 2024-08-02 16:48:14,861 - DEBUG- send_request_headers.complete
 2024-08-02 16:48:14,862 - DEBUG- send_request_body.started request=<Request [b'POST']>
 2024-08-02 16:48:14,865 - DEBUG- send_request_body.complete
 2024-08-02 16:48:14,866 - DEBUG- receive_response_headers.started request=<Request [b'POST']>
 2024-08-02 16:48:16,535 - DEBUG- receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 02 Aug 2024 20:48:16 GMT'), (b'Content-Type', b'applic

In [51]:
response

{'question': ' Tell me about llama2?',
 'contexts': 'Alan Schelten Ruan Silva Eric Michael Smith Ranjan Subramanian Xiaoqing Ellen Tan Binh Tang\nRoss Taylor Adina Williams Jian Xiang Kuan Puxin Xu Zheng Yan Iliyan Zarov Yuchen Zhang\nAngela Fan Melanie Kambadur Sharan Narang Aurelien Rodriguez Robert Stojnic\nSergey Edunov Thomas Scialom\x03\nGenAI, Meta\nAbstract\nIn this work, we develop and release Llama 2, a collection of pretrained and ﬁne-tuned\nlarge language models (LLMs) ranging in scale from 7 billion to 70 billion parameters.\nOur ﬁne-tuned LLMs, called L/l.sc/a.sc/m.sc/a.sc /two.taboldstyle-C/h.sc/a.sc/t.sc , are optimized for dialogue use cases. Our\nmodels outperform open-source chat models on most benchmarks we tested, and based on\nourhumanevaluationsforhelpfulnessandsafety,maybeasuitablesubstituteforclosedsource models. We provide a detailed description of our approach to ﬁne-tuning and safety\n---\nasChatGPT,BARD,andClaude. TheseclosedproductLLMsareheavilyﬁne-tunedto

# Chaining everything in Sequential Chain

In [56]:
from langchain.chains import TransformChain
def retrieval_transform(inputs:dict)->dict:
    docs = retriever.get_relevant_documents(inputs['question'])
    docs = [doc.page_content for doc in docs]
    docs_dict ={
        "query":inputs["question"],
        "contexts":"\n--\n".join(docs)
    }
    return docs_dict
    
retreival_chain = TransformChain(
    input_variables=["question"],
    output_variables=["query","contexts"],
    transform=retrieval_transform
)

In [57]:
from langchain.chains import SequentialChain
rag_chain = SequentialChain(
    chains=[retreival_chain, chain],
    input_variables=["question"],
    output_variables=["query","contexts","text"],
    verbose=True
)

In [60]:
out = rag_chain({"question":question})
out["text"]

 2024-08-02 17:15:23,728 - DEBUG- Request options: {'method': 'post', 'url': '/chat/completions', 'files': None, 'json_data': {'messages': [{'content': 'You are an AI language model assistant. Your task is \n    to generate 3 different versions of the given user \n    question to retrieve relevant documents from a vector  database. \n    By generating multiple perspectives on the user question, \n    your goal is to help the user overcome some of the limitations \n    of distance-based similarity search. Provide these alternative \n    questions separated by newlines. Original question:  Tell me about llama2?', 'role': 'user'}], 'model': 'gpt-3.5-turbo', 'logprobs': False, 'n': 1, 'stream': False, 'temperature': 0.0}}
 2024-08-02 17:15:23,731 - DEBUG- Sending HTTP Request: POST https://api.openai.com/v1/chat/completions
 2024-08-02 17:15:23,734 - DEBUG- close.started
 2024-08-02 17:15:23,736 - DEBUG- close.complete
 2024-08-02 17:15:23,737 - DEBUG- connect_tcp.started host='api.openai.

 2024-08-02 17:15:23,823 - DEBUG- connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x0000023C63D86180>
 2024-08-02 17:15:23,824 - DEBUG- start_tls.started ssl_context=<ssl.SSLContext object at 0x0000023C639218D0> server_hostname='api.openai.com' timeout=None
 2024-08-02 17:15:23,866 - DEBUG- start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x0000023C63D86240>
 2024-08-02 17:15:23,868 - DEBUG- send_request_headers.started request=<Request [b'POST']>
 2024-08-02 17:15:23,871 - DEBUG- send_request_headers.complete
 2024-08-02 17:15:23,875 - DEBUG- send_request_body.started request=<Request [b'POST']>
 2024-08-02 17:15:23,876 - DEBUG- send_request_body.complete
 2024-08-02 17:15:23,878 - DEBUG- receive_response_headers.started request=<Request [b'POST']>




[1m> Entering new SequentialChain chain...[0m


 2024-08-02 17:15:24,694 - DEBUG- receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 02 Aug 2024 21:15:24 GMT'), (b'Content-Type', b'application/json'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'openai-organization', b'user-cgptltwq9ygldnaumgtpmpwd'), (b'openai-processing-ms', b'576'), (b'openai-version', b'2020-10-01'), (b'strict-transport-security', b'max-age=15552000; includeSubDomains; preload'), (b'x-ratelimit-limit-requests', b'10000'), (b'x-ratelimit-limit-tokens', b'200000'), (b'x-ratelimit-remaining-requests', b'9999'), (b'x-ratelimit-remaining-tokens', b'199867'), (b'x-ratelimit-reset-requests', b'8.64s'), (b'x-ratelimit-reset-tokens', b'39ms'), (b'x-request-id', b'req_86d773b74a33a7a1a87745f429ff375c'), (b'CF-Cache-Status', b'DYNAMIC'), (b'X-Content-Type-Options', b'nosniff'), (b'Server', b'cloudflare'), (b'CF-RAY', b'8ad1154368680588-IAD'), (b'Content-Encoding', b'gzip'), (b'alt-svc', b'h3=":443"; ma=86400


[1m> Finished chain.[0m


'Llama 2 is a collection of pretrained and fine-tuned large language models (LLMs) ranging in scale from 7 billion to 70 billion parameters. The fine-tuned LLMs, called L/l.sc/a.sc/m.sc/a.sc /two.taboldstyle-C/h.sc/a.sc/t.sc, are optimized for dialogue use cases and outperform open-source chat models on most benchmarks tested. They are intended for commercial and research use in English for assistant-like chat applications. More information can be found in the provided contexts.'

# Custom MultiQuery

In [70]:
from typing import List
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field

#output parser will split the LLM result into a list of queries
class LineList(BaseModel):
    lines: List[str] = Field(description="List of text")

class LineListOutputParser(PydanticOutputParser):
    def __init__(self)->None:
        super().__init__(pydantic_object=LineList)
    
    def parse(self, text: str) -> LineList:
        lines = text.strip().split("\n")
        return LineList(lines=lines)
    
output_parser = LineListOutputParser()

In [73]:
template ="""Your task is to generate 3 different search queries that aim to answer the user question from multiple perspetives.
            Each query MUST tackle the question from a different perspective. 
            Provide these alternative questions separated by newlines.
            
            Original question: {question}
            """
Query_Prompt = PromptTemplate(template=template, input_variables=["question"])
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.7)
llm_chain = LLMChain(llm=llm, prompt=Query_Prompt, output_parser= output_parser)

 2024-08-02 18:22:19,856 - DEBUG- load_ssl_context verify=True cert=None trust_env=True http2=False
 2024-08-02 18:22:19,858 - DEBUG- load_verify_locations cafile='c:\\Maran\\StudyMaterials\\Git\\LangChain\\Multi-Query-RAG\\venv\\Lib\\site-packages\\certifi\\cacert.pem'
 2024-08-02 18:22:21,100 - DEBUG- load_ssl_context verify=True cert=None trust_env=True http2=False
 2024-08-02 18:22:21,101 - DEBUG- load_verify_locations cafile='c:\\Maran\\StudyMaterials\\Git\\LangChain\\Multi-Query-RAG\\venv\\Lib\\site-packages\\certifi\\cacert.pem'


In [77]:
llm_chain.invoke("What is llama2?")

 2024-08-02 18:37:19,851 - DEBUG- Request options: {'method': 'post', 'url': '/chat/completions', 'files': None, 'json_data': {'messages': [{'content': 'Your task is to generate 3 different search queries that aim to answer the user question from multiple perspetives.\n            Each query MUST tackle the question from a different perspective. \n            Provide these alternative questions separated by newlines.\n            \n            Original question: What is llama2?\n            ', 'role': 'user'}], 'model': 'gpt-3.5-turbo', 'logprobs': False, 'n': 1, 'stream': False, 'temperature': 0.7}}
 2024-08-02 18:37:19,854 - DEBUG- Sending HTTP Request: POST https://api.openai.com/v1/chat/completions
 2024-08-02 18:37:19,857 - DEBUG- close.started
 2024-08-02 18:37:19,859 - DEBUG- close.complete
 2024-08-02 18:37:19,861 - DEBUG- connect_tcp.started host='api.openai.com' port=443 local_address=None timeout=None socket_options=None
 2024-08-02 18:37:19,965 - DEBUG- connect_tcp.complete

OutputParserException: Failed to parse LineList from completion 1. Got: 1 validation error for LineList
  Input should be a valid dictionary or instance of LineList [type=model_type, input_value=1, input_type=int]
    For further information visit https://errors.pydantic.dev/2.8/v/model_type

In [76]:
# Run the query
retriever = MultiQueryRetriever(
    retriever = vectorstore.as_retriever(),
    llm_chain= llm_chain,
    parser_key="lines"
)
docs = retriever.get_relevant_documents(query=question)
len(docs)

 2024-08-02 18:25:25,843 - DEBUG- Request options: {'method': 'post', 'url': '/chat/completions', 'files': None, 'json_data': {'messages': [{'content': 'Your task is to generate 3 different search queries that aim to answer the user question from multiple perspetives.\n            Each query MUST tackle the question from a different perspective. \n            Provide these alternative questions separated by newlines.\n            \n            Original question:  Tell me about llama2?\n            ', 'role': 'user'}], 'model': 'gpt-3.5-turbo', 'logprobs': False, 'n': 1, 'stream': False, 'temperature': 0.7}}
 2024-08-02 18:25:25,846 - DEBUG- Sending HTTP Request: POST https://api.openai.com/v1/chat/completions
 2024-08-02 18:25:25,848 - DEBUG- close.started
 2024-08-02 18:25:25,849 - DEBUG- close.complete
 2024-08-02 18:25:25,851 - DEBUG- connect_tcp.started host='api.openai.com' port=443 local_address=None timeout=None socket_options=None


 2024-08-02 18:25:25,945 - DEBUG- connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x0000023C63BC8500>
 2024-08-02 18:25:25,946 - DEBUG- start_tls.started ssl_context=<ssl.SSLContext object at 0x0000023C63B4F6D0> server_hostname='api.openai.com' timeout=None
 2024-08-02 18:25:25,982 - DEBUG- start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x0000023C5A957A40>
 2024-08-02 18:25:25,984 - DEBUG- send_request_headers.started request=<Request [b'POST']>
 2024-08-02 18:25:25,986 - DEBUG- send_request_headers.complete
 2024-08-02 18:25:25,988 - DEBUG- send_request_body.started request=<Request [b'POST']>
 2024-08-02 18:25:25,990 - DEBUG- send_request_body.complete
 2024-08-02 18:25:25,992 - DEBUG- receive_response_headers.started request=<Request [b'POST']>
 2024-08-02 18:25:26,920 - DEBUG- receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 02 Aug 2024 22:25:27 GMT'), (b'Content-Type', b'applic

OutputParserException: Failed to parse LineList from completion 1. Got: 1 validation error for LineList
  Input should be a valid dictionary or instance of LineList [type=model_type, input_value=1, input_type=int]
    For further information visit https://errors.pydantic.dev/2.8/v/model_type