### RAG Application

#### Imported all required libraries

In [1]:
from langchain_community.vectorstores import Chroma
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain.document_loaders.pdf import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.output_parsers import StrOutputParser
from langchain import hub
import pinecone 
import os

  from tqdm.autonotebook import tqdm


#### Loading all environment variables 

In [2]:
from dotenv import load_dotenv

load_dotenv()

True

#### function for extracting sources from documents

In [3]:
def extract_source(docs):
    sources = []
    for doc in docs:
        sources.append(doc.metadata['source'])
    return sources

#### function for formatting documents

In [4]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

#### pulling rag_prompt from langchain-hub

In [18]:
prompt = hub.pull("rlm/rag-prompt")

print(prompt)

input_variables=['context', 'question'] metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'} messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"))]


In [None]:
# input variables
print(prompt.input_variables)

['context', 'question']


In [None]:
# prompt template
print(prompt.messages[0].prompt.template)

You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
Question: {question} 
Context: {context} 
Answer:


#### RAG function

In [14]:
def RAG(query: str , top_k: int=3, source: bool=False) -> str:

    # llm model 
    llm = ChatOpenAI()

    # embeddings model
    embedding_model = OpenAIEmbeddings()

    # prompt 
    prompt = hub.pull("rlm/rag-prompt")

    # output parser 
    output_parser = StrOutputParser()

    # loading documents 
    doc_loader = PyPDFLoader(file_path="sample_doc/doc.pdf")
    documents = doc_loader.load()

    # chunking (splitting document into chunks)
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=200,
    )
    chunks = text_splitter.split_documents(documents)

    # embedding documents into vector store 
    vector_store = Chroma.from_documents(
        documents=chunks,
        embedding=embedding_model,
    )

    # retriever 
    retriever = vector_store.as_retriever(
        search_kwargs={"k": top_k},
    )

    if source:
    ### response with source ###
        # rag chain from docs
        rag_chain_from_docs = (
            RunnablePassthrough.assign(context=(lambda x: format_docs(x['context'])))
            | prompt
            | llm
            | output_parser
        )
        
        # retrieved docs
        retrieved_docs = (lambda x: x["question"]) | retriever

        # rag chain with source
        rag_source_chain = RunnablePassthrough.assign(context=retrieved_docs).assign(
            answer=rag_chain_from_docs)
        
        reponse = rag_source_chain.invoke({"question": query})

        sources = extract_source(reponse['context'])

        return reponse['answer'], sources

    else:
    ### response without source ###
        rag_chain = (
        {"context": retriever | RunnableLambda(format_docs), "question": RunnablePassthrough()}
        | prompt 
        | llm
        | output_parser
        )
        # invoke the chain & get the response
        reponse = rag_chain.invoke(query)

        return reponse

#### RAG response without Source

In [15]:
# Response without source
response = RAG("What is Attention?")

response

'Attention is a function that maps a query and key-value pairs to an output using vectors. The output is calculated as a weighted sum.'

#### RAG response with Source 

In [16]:
# RAW Response with source
RAG("What is Attention mechanism?", source=True)

('An attention mechanism is a function that maps a query and key-value pairs to an output using vectors. It computes the output as a weighted sum of the values based on the query and keys provided.',
 ['sample_doc/doc.pdf', 'sample_doc/doc.pdf', 'sample_doc/doc.pdf'])

In [17]:
# Response with sources
response, sources = RAG("What is Attention mechanism?", source=True)

print("Answer: ", response)
print("Sources: ", sources)

Answer:  An attention mechanism is a function that maps a query and key-value pairs to an output using vectors. The output is calculated as a weighted sum based on the query and key-value pairs.
Sources:  ['sample_doc/doc.pdf', 'sample_doc/doc.pdf', 'sample_doc/doc.pdf']
