# Build Local RAG

In [10]:
from langchain_core.prompts import PromptTemplate
from langchain.chat_models import ChatOllama
from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_pinecone import PineconeVectorStore
import pinecone, requests

### 1- Download research papers

In [None]:
def get_paper_titles(topic):
    base_url = "https://api.semanticscholar.org/graph/v1/paper/autocomplete"
    params = {
        "query": topic,
        "limit": 15  # You can adjust the limit as per your requirement
    }

    response = requests.get(base_url, params=params)

    if response.status_code == 200:
        data = response.json()
        titles = [paper['title'] for paper in data['matches']]
        return titles
    else:
        print("Failed to fetch data from Semantic Scholar API")
        return []


In [None]:
topic = "Machine learning"
paper_titles = get_paper_titles(topic)
print("Titles of papers related to", topic, ":", paper_titles)

Titles of papers related to Machine learning : ['Machine learning - a probabilistic perspective', 'Machine learning in automated text categorization', 'Machine learning: Trends, perspectives, and prospects', 'Machine Learning for High-Speed Corner Detection', 'Machine Learning, Neural and Statistical Classification', 'Machine learning for molecular and materials science', 'Machine Learning in Medicine', 'Machine learning applications in cancer prognosis and prediction', 'MACHINE LEARNING An Artificial Intelligence Approach', 'Machine Learning With Python']


In [None]:
from scidownl import scihub_download

def download_multi_papers(sources):
    """Example of downloading multiple papers.
    All papers will be downloaded to the ./paper/ directory,
    and their filenames are the paper titles.
    """
    for title in sources:
        scihub_download(title, paper_type='title', out="./papers/")

In [None]:
download_multi_papers(paper_titles)

### 2- Build retriever chain

In [None]:
def load_pdf(data):
    loader = DirectoryLoader(data, glob="*.pdf", loader_cls=PyPDFLoader)
    documents = loader.load()
    return documents

In [None]:
# extract data from pdf under "papers" folder
data = load_pdf("./papers")

In [None]:
print(len(data))

97


In [None]:
def text_splitter(data):
    splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=20)
    text_chunks = splitter.split_documents(data)
    return text_chunks

In [None]:
# get my data text chunks
text_chunks = text_splitter(data)

In [11]:
def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

In [None]:
embeddings = download_hugging_face_embeddings()

In [None]:
query_text = embeddings.embed_query('hello')
dims = len(query_text)
print("embeddings dimension:", len(query_text))

embeddings dimension: 384


In [13]:
import os

# Pinecone setup
# API key
PINECONE_API_KEY = ""

# Index name
index_name = "research-assistant"

In [None]:
# Create Pinecone index
pc = pinecone.Pinecone(api_key=PINECONE_API_KEY)
pc.create_index(
  name=index_name,
  dimension=dims,
  metric="cosine",
  spec=pinecone.ServerlessSpec(
    cloud="aws",
    region="us-east-1"
  )
)

In [None]:
# Create data embeddings for each chunk and store vectors into Pinecone
docsearch = PineconeVectorStore.from_texts([t.page_content for t in text_chunks], embeddings, index_name=index_name)

In [14]:
# load previously stored Pinecone index
docsearch = PineconeVectorStore.from_existing_index(index_name, embeddings)

In [None]:
# test similarity search using stored vectors
query = "What is Machine learning?"
docs = docsearch.similarity_search(query, k=3)
print(docs)

[Document(page_content='“... [W]e define machine learning as a set of methods that \ncan automatically detect patterns in data, and then use the uncovered patterns to predict future data, or to perform other kinds of decision making under uncertainty (such as planning how to collect more data!).” (p.1)'), Document(page_content='machine learning is likely to be one of the most\ntransformative technologies of the 21st century.Although it is impossible to predict the future, it\nappears essential that society begin now to con-\nsider how to maximize its benefits.\nREFERENCES'), Document(page_content='approach to machine learning is closely related to the field of statistics, but differs slightly in terms of its emphasis and terminology,” p.1). Let us unite!')]


In [15]:
retriever = docsearch.as_retriever()

In [None]:
# load llama3 model with Ollama
llm = ChatOllama(model="llama3", temperature=0)

# Prompt
prompt = PromptTemplate(
    template="""
    <|begin_of_text|>
    <|start_header_id|>system<|end_header_id|>
    You are a research assistant for question-answering tasks.
    Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know.
    Use three sentences maximum and keep the answer concise
    <|eot_id|><|start_header_id|>user<|end_header_id|>
    Question: {question}
    Context: {context}
    Answer: <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["question", "context"],
)

# rag chain
rag_chain = prompt | llm | StrOutputParser()

In [None]:
# Try it out
query = "What is Machine learning?"
docs = retriever.invoke(query)
answer = rag_chain.invoke({"question": query, "context": docs})
print(answer)

According to the provided context, machine learning is defined as a set of methods that can automatically detect patterns in data and use those patterns to predict future data or make decisions under uncertainty. It's an approach closely related to statistics but with a different emphasis and terminology.


# Build Evaluation/Routing agents

### 1- Questions routing agent

In [None]:
# use json output format
llm = ChatOllama(model="llama3", format="json", temperature=0)

router_prompt = PromptTemplate(
    template="""
    <|begin_of_text|>
    <|start_header_id|>system<|end_header_id|>
    You are an expert at routing a user question to a vectorstore or web search. Use the vectorstore for questions realted to {topics}.
    You do not need to be stringent with the keywords in the question related to these topics. Otherwise, use web-search.
    Return a binary choice 'web_search' or 'vectorstore' based on the question. Return the a JSON with a single key 'datasource' and
    no premable or explaination.
    Question to route: {question}
    <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["question", "topics"],
)

# define local data topics
topics = "machine learning, deep learning, artificial intelligence"

router_chain = router_prompt | llm | JsonOutputParser()

question = "what is text classification?"
result = router_chain.invoke({"question": question, "topics": topics})
print(result)

{'datasource': 'vectorstore'}


### 2- Retrieval Document grader agent

In [None]:
# # use json output format
llm = ChatOllama(model="llama3", format="json", temperature=0)

retrieval_prompt = PromptTemplate(
    template="""
    <|begin_of_text|><|start_header_id|>system<|end_header_id|>
    You are a grader assessing relevance of a retrieved document to a user question. If the document contains keywords related to the user question,
    grade it as relevant. It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n
    Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question. \n
    Provide the binary score as a JSON with a single key 'score' and no premable or explaination.
    <|eot_id|><|start_header_id|>user<|end_header_id|>
    Here is the retrieved document: \n\n {document} \n\n
    Here is the user question: {question} \n <|eot_id|><|start_header_id|>assistant<|end_header_id|>
    """,
    input_variables=["question", "document"],
)

retrieval_grader = retrieval_prompt | llm | JsonOutputParser()
question = "artificial intelligence"
docs = retriever.invoke(question)
print(retrieval_grader.invoke({"question": question, "document": docs[1].page_content}))

{'score': 'yes'}


### 3- LLM Answer Grader Agent

In [None]:
grader_prompt = PromptTemplate(
    template="""
    <|begin_of_text|>
    <|start_header_id|>system<|end_header_id|>
    You are a grader assessing whether an answer is useful to resolve a question. Return a binary score 'yes' or 'no' to indicate whether the answer is
    useful to resolve a question. Provide the binary score as a JSON with a single key 'score' and no preamble or explanation.
    <|eot_id|><|start_header_id|>user<|end_header_id|>
    Here is the answer:
    \n ------- \n
    {generation}
    \n ------- \n
    Here is the question: {question}
    <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["generation", "question"],
)

grader_chain = grader_prompt | llm | JsonOutputParser()

# use question & generation from RAG obtained earlier
result = grader_chain.invoke({"generation": answer, "question": query})
print(result)

{'score': 'yes'}


### 4- Hallucination Grader Agent

In [None]:
### We will not use this agent in our graph

hallucination_prompt = PromptTemplate(
    template="""
    <|begin_of_text|>
    <|start_header_id|>system<|end_header_id|>
    You are a grader assessing whether an answer is grounded in / supported by a set of facts.
    Give a binary 'yes' or 'no' score to indicate whether the answer is grounded in / supported by a set of facts.
    Provide the binary score as a JSON with a single key 'score' and no preamble or explanation.
    <|eot_id|><|start_header_id|>user<|end_header_id|>
    Here are the facts:
    \n ------- \n
    {documents}
    \n ------- \n
    Here is the answer: {generation}
    <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["generation", "documents"],
)

hallucination_grader_chain = hallucination_prompt | llm | JsonOutputParser()

# retrieve dummy documents to test the grader
docs = retriever.invoke("what is regression?")
result = hallucination_grader_chain.invoke({"generation": answer, "documents": query})
print(result)

{'score': 'yes'}


### 5- Web search tool

In [16]:
os.environ["TAVILY_API_KEY"] = "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"

In [17]:
from langchain_community.tools.tavily_search import TavilySearchResults

web_search = TavilySearchResults()

# Build global RAG graph

### 1- Set graph state

In [18]:
from typing_extensions import TypedDict
from typing import List

### Our graph state
class GraphState(TypedDict):
    """
    Represents the state of our graph.

    Attributes:
        question: question
        generation: LLM generation
        web_search: whether to add search
        documents: list of documents
    """
    question : str
    generation : str
    web_search : str
    documents : List[str]

### 2- Combine RAG Agents into one class

In [21]:
# re-define all needed Prompt

# RAG Prompt
rag_prompt_template = """
    <|begin_of_text|>
    <|start_header_id|>system<|end_header_id|>
    You are a research assistant for question-answering tasks.
    Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know.
    Use three sentences maximum and keep the answer concise
    <|eot_id|><|start_header_id|>user<|end_header_id|>
    Question: {question}
    Context: {context}
    Answer: <|eot_id|><|start_header_id|>assistant<|end_header_id|>"""

# Question Routing prompt
router_prompt_template = """
    <|begin_of_text|>
    <|start_header_id|>system<|end_header_id|>
    You are an expert at routing a user question to a vectorstore or web search. Use the vectorstore for questions realted to {topics}.
    You do not need to be stringent with the keywords in the question related to these topics. Otherwise, use web-search.
    Return a binary choice 'websearch' or 'vectorstore' based on the question. Return the a JSON with a single key 'datasource' and
    no premable or explaination.
    Question to route: {question}
    <|eot_id|><|start_header_id|>assistant<|end_header_id|>"""

# Document retrieval grading prompt
retrieval_prompt_template = """
    <|begin_of_text|><|start_header_id|>system<|end_header_id|>
    You are a grader assessing relevance of a retrieved document to a user question. If the document contains keywords related to the user question,
    grade it as relevant. It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n
    Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question. \n
    Provide the binary score as a JSON with a single key 'score' and no premable or explaination.
    <|eot_id|><|start_header_id|>user<|end_header_id|>
    Here is the retrieved document: \n\n {document} \n\n
    Here is the user question: {question} \n <|eot_id|><|start_header_id|>assistant<|end_header_id|>
    """

# LLM generation grading prompt
grader_prompt_template = """
    <|begin_of_text|>
    <|start_header_id|>system<|end_header_id|>
    You are a grader assessing whether an answer is useful to resolve a question. Return a binary score 'yes' or 'no' to indicate whether the answer is
    useful to resolve a question. Provide the binary score as a JSON with a single key 'score' and no preamble or explanation.
    <|eot_id|><|start_header_id|>user<|end_header_id|>
    Here is the answer:
    \n ------- \n
    {generation}
    \n ------- \n
    Here is the question: {question}
    <|eot_id|><|start_header_id|>assistant<|end_header_id|>"""

In [22]:
class RAGAgents():
    def __init__(self, retriever):
        # tavily websearch tool
        self.web_search = TavilySearchResults()

        # Document retriever
        self.retriever = retriever

        # load llama3 model with Ollama
        llm = ChatOllama(model="llama3", temperature=0)

        # RAG chain
        rag_prompt = PromptTemplate(
            template=rag_prompt_template,
            input_variables=["question", "context"],
        )
        self.rag_chain = rag_prompt | llm | StrOutputParser()

        # use json output format
        llm = ChatOllama(model="llama3", format="json", temperature=0)

        # Router chain
        router_prompt = PromptTemplate(
            template=router_prompt_template,
            input_variables=["question", "topics"],
        )
        self.router_chain = router_prompt | llm | JsonOutputParser()

        # Retrieval grader chain
        retrieval_prompt = PromptTemplate(
            template=retrieval_prompt_template,
            input_variables=["question", "document"],
        )
        self.retrieval_grader = retrieval_prompt | llm | JsonOutputParser()

        # Answer grader chain
        grader_prompt = PromptTemplate(
            template=grader_prompt_template,
            input_variables=["generation", "question"],
        )
        self.answer_grader_chain = grader_prompt | llm | JsonOutputParser()

### 3- Setup graph nodes

In [19]:
from langchain.schema import Document

### Nodes
class Nodes():
    def __init__(self, retriever):
        self.agents = RAGAgents(retriever)

    def route_question(self, state):
        """
        Route question to web search or RAG.

        Args:
            state (dict): The current graph state

        Returns:
            str: Next node to call
        """

        print("---ROUTE QUESTION---")
        # define local data topics
        topics = "machine learning, deep learning, artificial intelligence"
        question = state["question"]
        source = self.agents.router_chain.invoke({"question": question, "topics": topics})
        print(f"---ROUTE QUESTION TO: {source['datasource']}---")
        return source['datasource']

    def retrieve_documents(self, state):
        """
        Retrieve documents from vectorstore

        Args:
            state (dict): The current graph state

        Returns:
            state (dict): New key added to state, documents, that contains retrieved documents
        """
        print("---RETRIEVE DOCUMENTS---")
        question = state["question"]

        # Retrieval
        documents = self.agents.retriever.invoke(question)
        return {"documents": documents, "question": question}

    def generate_with_rag(self, state):
        """
        Generate answer using RAG on retrieved documents

        Args:
            state (dict): The current graph state

        Returns:
            state (dict): New key added to state, generation, that contains LLM generation
        """
        print("---GENERATE WITH RAG---")
        question = state["question"]
        documents = state["documents"]

        generation = self.agents.rag_chain.invoke({"question": question, "context": documents})
        return {"documents": documents, "question": question, "generation": generation}

    def grade_retrieved_documents(self, state):
        """
        Determines whether the retrieved documents are relevant to the question
        If any document is not relevant, we will set a flag to run web search

        Args:
            state (dict): The current graph state

        Returns:
            state (dict): Filtered out irrelevant documents and updated web_search state
        """

        print("---CHECK DOCUMENT RELEVANCE TO QUESTION---")
        question = state["question"]
        documents = state["documents"]

        # Score each doc
        filtered_docs = []
        web_search = "No"
        for d in documents:
            score = self.agents.retrieval_grader.invoke({"question": question, "document": d.page_content})
            grade = score['score']
            # Document relevant
            if grade.lower() == "yes":
                filtered_docs.append(d)
            # Document not relevant
            else:
                # We do not include the document in filtered_docs
                # We set a flag to indicate that we want to run web search
                web_search = "Yes"
                continue
        return {"documents": filtered_docs, "question": question, "web_search": web_search}

    def perform_web_search(self, state):
        """
        Web search based based on the question

        Args:
            state (dict): The current graph state

        Returns:
            state (dict): Appended web results to documents
        """

        print("---WEB SEARCH---")
        question = state["question"]
        documents = state["documents"]

        # Web search
        docs = self.agents.web_search.invoke({"query": question})
        web_results = "\n".join([d["content"] for d in docs])
        web_results = Document(page_content=web_results)
        if documents is not None:
            documents.append(web_results)
        else:
            documents = [web_results]
        return {"documents": documents, "question": question}

    def decide_to_use_web_search(self, state):
        """
        Determines whether to generate an answer, or add web search

        Args:
            state (dict): The current graph state

        Returns:
            str: Binary decision for next node to call
        """

        print("---ASSESS GRADED DOCUMENTS---")
        question = state["question"]
        web_search = state["web_search"]
        filtered_documents = state["documents"]

        if web_search == "Yes":
            # All documents have been filtered check_relevance
            # We will re-generate a new query
            print("---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, INCLUDE WEB SEARCH---")
            return "websearch"
        else:
            # We have relevant documents, so generate answer
            print("---DECISION: GENERATE---")
            return "generate"

    def grade_generated_answer_vs_question(self, state):
        """
        Determines whether the generated answer does address to given question.

        Args:
            state (dict): The current graph state

        Returns:
            str: Decision for next node to call
        """

        print("---GRADE GENERATED ANSWER vs QUESTION---")
        question = state["question"]
        generation = state["generation"]

        # Check question-answering
        score = self.agents.answer_grader_chain.invoke({"question": question,"generation": generation})
        grade = score['score']
        if grade == "yes":
            print("---DECISION: GENERATION ADDRESSES QUESTION---")
            return "useful"
        else:
            print("---DECISION: GENERATION DOES NOT ADDRESS QUESTION---")
            return "not useful"

### 4- Build graph workflow

In [20]:
from langgraph.graph import END, StateGraph

class Worflow():
    def __init__(self, docs_retriever):
      # initiate graph state & nodes
      workflow = StateGraph(GraphState)
      nodes = Nodes(docs_retriever)

      # Define nodes
      workflow.add_node("retrieve_documents", nodes.retrieve_documents)
      workflow.add_node("grade_retrieved_documents", nodes.grade_retrieved_documents)
      workflow.add_node("generate", nodes.generate_with_rag)
      workflow.add_node("websearch", nodes.perform_web_search)

      # Build graph
      workflow.set_conditional_entry_point(
          nodes.route_question,
          {
              "websearch": "websearch",
              "vectorstore": "retrieve_documents"
          }
      )
      workflow.add_edge("retrieve_documents", "grade_retrieved_documents")
      workflow.add_conditional_edges(
          "grade_retrieved_documents",
          nodes.decide_to_use_web_search,
          {
              "websearch": "websearch",
              "generate": "generate"
          }
      )
      workflow.add_edge("websearch", "generate")
      workflow.add_conditional_edges(
          "generate",
          nodes.grade_generated_answer_vs_question,
          {
              "useful": END,
              "not useful": "websearch"
          }
      )

      # Compile
      self.app = workflow.compile()

In [32]:
# Try it out
from pprint import pprint

docs_retriever = retriever

# start RAG graph worfklow
app = Worflow(docs_retriever).app

inputs = {"question": "What is Machine learning?"}
for output in app.stream(inputs):
    for key, value in output.items():
        pprint(f"Finished running: {key}")
pprint(value["generation"])

---ROUTE QUESTION---
---ROUTE QUESTION TO: vectorstore---
---RETRIEVE DOCUMENTS---
'Finished running: retrieve_documents'
---CHECK DOCUMENT RELEVANCE TO QUESTION---
---ASSESS GRADED DOCUMENTS---
---DECISION: GENERATE---
'Finished running: grade_retrieved_documents'
---GENERATE WITH RAG---
---GRADE GENERATED ANSWER vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
'Finished running: generate'
('According to the provided context, machine learning is defined as a set of '
 'methods that can automatically detect patterns in data and use those '
 "patterns to predict future data or make decisions under uncertainty. It's an "
 'approach closely related to statistics but with a different emphasis and '
 'terminology.')
