# LLaMA3 Local RAG agent

![Drawing](diagram.png)


### Installing Required Packages

In [None]:
pip install -U langchain-nomic langchain_community tiktoken langchainhub langchain-openai chromadb langchain langgraph tavily-python nomic[local] langchain-text-splitters

### Setting Up Environment Variables

In [6]:
import os

os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
os.environ["LANGCHAIN_API_KEY"] = "KEY"

### Setting Up Local Language Model

In [5]:
local_llm = "llama3"

### Set up and index documents for retrieval

In [121]:
### Index

from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_nomic.embeddings import NomicEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

urls = [
    "https://www.oulu.fi/en/apply/international-programmes",
    "https://www.oulu.fi/en/apply/how-apply/applying-bachelors-programmes",
    "https://www.oulu.fi/en/apply/how-apply/applying-masters-programmes",
]

docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=250, chunk_overlap=0
)
doc_splits = text_splitter.split_documents(docs_list)

# Add to vectorDB
vectorstore = Chroma.from_documents(
    documents=doc_splits,
    collection_name="rag-chroma",
    embedding=NomicEmbeddings(model="nomic-embed-text-v1.5", inference_mode="local"),
)
retriever = vectorstore.as_retriever()

### Set up a system to assess the relevance of retrieved documents to a user question

In [122]:
### Retrieval Grader

from langchain_community.chat_models import ChatOllama
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import PromptTemplate

# LLM
llm = ChatOllama(model=local_llm, format="json", temperature=0)

prompt = PromptTemplate(
    template="""system You are a grader assessing relevance 
    of a retrieved document to a user question. If the document contains keywords related to the user question, 
    grade it as relevant. It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n
    Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question. \n
    Provide the binary score as a JSON with a single key 'score' and no preamble or explanation.
     user
    Here is the retrieved document: \n\n {document} \n\n
    Here is the user question: {question} \n assistant
    """,
    input_variables=["question", "document"],
)

retrieval_grader = prompt | llm | JsonOutputParser()

# Get user input for the question
question = input("Please enter your question: ")

# Retrieve documents based on the user question
docs = retriever.invoke(question)
doc_txt = docs[1].page_content

# Assess the relevance of the retrieved document
result = retrieval_grader.invoke({"question": question, "document": doc_txt})
print(result)

Please enter your question:  How many master's programs are taught in English?


{'score': 'yes'}


### Set up and perform a web search based on the user's question

In [11]:
### Search
from langchain_community.tools.tavily_search import TavilySearchResults
    
# Initialize the web search tool
web_search_tool = TavilySearchResults(k=3)
    
# Perform a web search based on the user's question
search_results = web_search_tool.invoke({"query": question})
    
# Print or use the search results
print("Web search results:", search_results)

Web search results: [{'url': 'https://www.studying-in-germany.org/master-degree/', 'content': "Master's programs in Germany offer a diverse range of options, with many taught in English. They are known for their affordability, top-notch curricula, hands-on learning, and excellent career prospects. So, it's no wonder that master's degrees are the go-to choice for international students in Germany. During the 2021/2022 period, 42.6% of them (148,901 students) were […]"}, {'url': 'https://beyondthestates.com/the-ultimate-guide-to-masters-degrees-in-europe/', 'content': "Know this - there are over 8,231 accredited, 100% English-taught master's degrees to choose from across Europe; no foreign language required. More and more people from the US, and other countries, are heading to Europe for higher ed opportunities, and for good reason - there are so many affordable, high-quality English-taught programs ..."}, {'url': 'https://www.gooverseas.com/blog/10-universities-where-you-can-study-abroa

### This cell sets up and executes a sequence to generate a specific search query based on the user's question and combined document content, then performs a web search using the generated query

In [124]:
from langchain_community.chat_models import ChatOllama
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_community.tools.tavily_search import TavilySearchResults

# Step 1: Initialize the Language Model
llm = ChatOllama(model=local_llm, format="json", temperature=0)

# Step 2: Define Prompt Template for Analysis
prompt = PromptTemplate(
    template="""system You are an internet search query generator. \n
    Here is the combined text of all documents: {combined_text}
    Here is the user question: {question}
    Your task is to understand and interpret the user's question to generate a specific search query for Google.

    Question: {question} 
    Context: {combined_text} 
    """,
    input_variables=["combined_text", "question"],
)

# Step 3: Retrieve and Combine Document Content
combined_text = "\n".join([doc.page_content for doc in docs])

# Step 4: Define Retrieval Analysis Pipeline
retrieval_analysis = prompt | llm | JsonOutputParser()

# Step 5: Invoke Retrieval Analysis with Combined Text and User Question
analysis_result = retrieval_analysis.invoke({"combined_text": combined_text, "question": question})

# Step 6: Print or Process the Analysis Result
print("Search query:", analysis_result)

# Step 7: Perform Web Search Based on Generated Query
query = analysis_result['query']

# Initialize the web search tool
web_search_tool = TavilySearchResults(k=3)

# Perform a web search based on the user's question
search_results = web_search_tool.invoke({"query": query})

# Step 8: Print or Use the Search Results
print("Web search results:", search_results)


Search query: {'query': "How many master's programs are taught in English at University of Oulu?", 'type': 'search'}
Web search results: [{'url': 'https://www.oulu.fi/en/apply/international-programmes', 'content': "Applying to Bachelor's Programmes\nApplying to Master's Programmes\nLet us help you in finding your match\nInternational Applicant's Guide 2024 - read it online\nTake our study choice test and find your programme\nPostal address\n +358 294 48 0000\nStreet address\nPentti Kaiteran katu 1\nLinnanmaa\nFooter links\nFooter links\nChat with us\nWe would like to talk to you via chat. UniOulu Ambassador Blog\nAdmissions Contact\nUniversity of Oulu in short\nFounded in 1958, University of Oulu is one of the largest universities in Finland and one of the northernmost universities. University rankings\nStudent Life\nSustainable development in education\nHow to apply\nTo make your applicant journey smoother and to help you gather all the necessary documents on time, here are all import

### Generate Answer from Retrieved Context

In [11]:
### Generate

from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate

# Prompt
prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are an assistant for question-answering tasks. 
    Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. 
    Use three sentences maximum and keep the answer concise <|eot_id|><|start_header_id|>user<|end_header_id|>
    Question: {question} 
    Context: {context} 
    Answer: <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["question", "document"],
)

llm = ChatOllama(model=local_llm, temperature=0)


# Post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


# Chain
rag_chain = prompt | llm | StrOutputParser()

# Generate answer
generation = rag_chain.invoke({"context": format_docs(docs), "question": question})
print("Generated Answer:")
print(generation)

Generated Answer:
According to the provided context, the University of Oulu offers 21 Master's programs that are taught in English.


### Evaluate whether a generated answer is grounded in a set of facts

In [15]:
### Hallucination Grader

# LLM
llm = ChatOllama(model=local_llm, format="json", temperature=0)

# Prompt
prompt = PromptTemplate(
    template=""" <|begin_of_text|><|start_header_id|>system<|end_header_id|> You are a grader assessing whether 
    an answer is grounded in / supported by a set of facts. Give a binary 'yes' or 'no' score to indicate 
    whether the answer is grounded in / supported by a set of facts. Provide the binary score as a JSON with a 
    single key 'score' and no preamble or explanation. <|eot_id|><|start_header_id|>user<|end_header_id|>
    Here are the facts:
    \n ------- \n
    {documents} 
    \n ------- \n
    Here is the answer: {generation}  <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["generation", "documents"],
)

hallucination_grader = prompt | llm | JsonOutputParser()
hallucination_grader.invoke({"documents": docs, "generation": generation})

{'score': 'yes'}

### Assess whether a generated answer is useful for resolving a question

In [14]:
### Answer Grader

# LLM
llm = ChatOllama(model=local_llm, format="json", temperature=0)

# Prompt
prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are a grader assessing whether an 
    answer is useful to resolve a question. Give a binary score 'yes' or 'no' to indicate whether the answer is 
    useful to resolve a question. Provide the binary score as a JSON with a single key 'score' and no preamble or explanation.
     <|eot_id|><|start_header_id|>user<|end_header_id|> Here is the answer:
    \n ------- \n
    {generation} 
    \n ------- \n
    Here is the question: {question} <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["generation", "question"],
)

answer_grader = prompt | llm | JsonOutputParser()
answer_grader.invoke({"question": question, "generation": generation})

{'score': 'yes'}

## Application

In [148]:
# Import necessary libraries
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_nomic.embeddings import NomicEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.chat_models import ChatOllama
from langchain_core.output_parsers import JsonOutputParser, StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_core.documents import Document

### Index Documents

urls = [
    "https://www.oulu.fi/en/apply/international-programmes",
    "https://www.oulu.fi/en/apply/how-apply/applying-bachelors-programmes",
    "https://www.oulu.fi/en/apply/how-apply/applying-masters-programmes",
]

docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=250, chunk_overlap=0
)
doc_splits = text_splitter.split_documents(docs_list)

# Add to vectorDB
vectorstore = Chroma.from_documents(
    documents=doc_splits,
    collection_name="rag-chroma",
    embedding=NomicEmbeddings(model="nomic-embed-text-v1.5", inference_mode="local"),
)
retriever = vectorstore.as_retriever()

### Retrieval Grader

# LLM
local_llm = "llama3"  
llm = ChatOllama(model=local_llm, format="json", temperature=0)

# Prompt Template for Relevance Grading
relevance_prompt = PromptTemplate(
    template="""system You are a grader assessing relevance 
    of a retrieved document to a user question. If the document contains keywords related to the user question, 
    grade it as relevant. It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n
    Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question. \n
    Provide the binary score as a JSON with a single key 'score' and no preamble or explanation.
     user
    Here is the retrieved document: \n\n {document} \n\n
    Here is the user question: {question} \n assistant
    """,
    input_variables=["question", "document"],
)

retrieval_grader = relevance_prompt | llm | JsonOutputParser()

### Web Search Tool Initialization
web_search_tool = TavilySearchResults(k=3)

### Generate Answer

# Prompt Template for Answer Generation
answer_prompt = PromptTemplate(
    template="""system You are an assistant for question-answering tasks. 
    Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. 
    Use three sentences maximum and keep the answer concise user
    Question: {question} 
    Context: {context} 
    Answer: assistant""",
    input_variables=["question", "context"],
)

answer_llm = ChatOllama(model=local_llm, temperature=0)
rag_chain = answer_prompt | answer_llm | StrOutputParser()

# Format Documents Function
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

### Answer Grader

# Prompt Template for Answer Grading
answer_grading_prompt = PromptTemplate(
    template="""system You are a grader assessing whether an 
    answer is useful to resolve a question. Give a binary score 'yes' or 'no' to indicate whether the answer is 
    useful to resolve a question. Provide the binary score as a JSON with a single key 'score' and no preamble or explanation.
     user Here is the answer:
    \n ------- \n
    {generation} 
    \n ------- \n
    Here is the question: {question} assistant""",
    input_variables=["generation", "question"],
)

answer_grader = answer_grading_prompt | llm | JsonOutputParser()

### Hallucination Grader

# Prompt Template for Hallucination Grading
hallucination_prompt = PromptTemplate(
    template=""" system You are a grader assessing whether 
    an answer is grounded in / supported by a set of facts. Give a binary 'yes' or 'no' score to indicate 
    whether the answer is grounded in / supported by a set of facts. Provide the binary score as a JSON with a 
    single key 'score' and no preamble or explanation. user
    Here are the facts:
    \n ------- \n
    {documents} 
    \n ------- \n
    Here is the answer: {generation}  assistant""",
    input_variables=["generation", "documents"],
)

hallucination_grader = hallucination_prompt | llm | JsonOutputParser()

### Main Workflow Function

def main_workflow(question):
    # Retrieve documents based on the user question
    docs = retriever.invoke(question)
    doc_txt = docs[0].page_content if docs else ""

    # Assess the relevance of the retrieved document
    result = retrieval_grader.invoke({"question": question, "document": doc_txt})

    if result["score"] == "no":
        print("The retrieved document is not relevant. Performing a web search...")
        # Perform a web search based on the user's question
        search_results = web_search_tool.invoke({"query": question})
        print("Web search results:", search_results)
    elif result["score"] == "yes":
        print("The retrieved document is relevant. Generating an answer...")
        # Generate answer based on the documents
        generation = rag_chain.invoke({"context": format_docs(docs), "question": question})
        print("Generated Answer:")
        print(generation)

        # Check the usefulness of the generated answer
        answer_result = answer_grader.invoke({"question": question, "generation": generation})
        if answer_result["score"] == "yes":
            print("Answer Grader: Pass")
            
            # Check for hallucinations
            hallucination_result = hallucination_grader.invoke({"documents": format_docs(docs), "generation": generation})
            if hallucination_result["score"] == "yes":
                print("Hallucination Grader: Pass")
            else:
                print("Hallucination Grader: Fail")
        else:
            print("Answer Grader: Fail")

### Run the Workflow

# Get user input for the question
question = input("Please enter your question: ")

# Execute the main workflow
main_workflow(question)


Please enter your question:  How many programs are taught in the Finnish language?


The retrieved document is not relevant. Performing a web search...
Web search results: [{'url': 'https://www.helsinki.fi/en/admissions-and-education/open-university/open-university-studies-degree-programme/languages-and-literatures-finland/finnish-foreigners', 'content': "Open university: Finnish for foreigners. Open University studies from the Bachelor's Programme in the Languages and Literatures of Finland. You can study individual courses. At the Open University, you can study Beginner and Intermediate Level Courses (A1.1 - B2.1). Courses are taught in Finnish."}, {'url': 'https://www.helsinki.fi/en/admissions-and-education/international-students/studies-available-english', 'content': 'You can easily get by in Helsinki with English but learning the local language will give you more career options and can make your stay more rewarding. The University of Helsinki offers free Finnish language courses for enrolled exchange, visiting and international degree students. They are a fun way 

In [146]:
# Import necessary libraries
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_nomic.embeddings import NomicEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.chat_models import ChatOllama
from langchain_core.output_parsers import JsonOutputParser, StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_core.documents import Document

### Index Documents

urls = [
    "https://www.oulu.fi/en/apply/international-programmes",
    "https://www.oulu.fi/en/apply/how-apply/applying-bachelors-programmes",
    "https://www.oulu.fi/en/apply/how-apply/applying-masters-programmes",
    "https://www.oulu.fi/en/apply/how-apply/eligibility",
]

docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=250, chunk_overlap=0
)
doc_splits = text_splitter.split_documents(docs_list)

# Add to vectorDB
vectorstore = Chroma.from_documents(
    documents=doc_splits,
    collection_name="rag-chroma",
    embedding=NomicEmbeddings(model="nomic-embed-text-v1.5", inference_mode="local"),
)
retriever = vectorstore.as_retriever()

### Retrieval Grader

# LLM
local_llm = "llama3"  
llm = ChatOllama(model=local_llm, format="json", temperature=0)

# Prompt Template for Relevance Grading
relevance_prompt = PromptTemplate(
    template="""system You are a grader assessing relevance 
    of a retrieved document to a user question. If the document contains keywords related to the user question, 
    grade it as relevant. It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n
    Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question. \n
    Provide the binary score as a JSON with a single key 'score' and no preamble or explanation.
     user
    Here is the retrieved document: \n\n {document} \n\n
    Here is the user question: {question} \n assistant
    """,
    input_variables=["question", "document"],
)

retrieval_grader = relevance_prompt | llm | JsonOutputParser()

### Web Search Tool Initialization
web_search_tool = TavilySearchResults(k=3)

### Generate Answer

# Prompt Template for Answer Generation
answer_prompt = PromptTemplate(
    template="""system You are an assistant for question-answering tasks. 
    Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. 
    Use three sentences maximum and keep the answer concise user
    Question: {question} 
    Context: {context} 
    Answer: assistant""",
    input_variables=["question", "context"],
)

answer_llm = ChatOllama(model=local_llm, temperature=0)
rag_chain = answer_prompt | answer_llm | StrOutputParser()

# Format Documents Function
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

### Answer Grader

# Prompt Template for Answer Grading
answer_grading_prompt = PromptTemplate(
    template="""system You are a grader assessing whether an 
    answer is useful to resolve a question. Give a binary score 'yes' or 'no' to indicate whether the answer is 
    useful to resolve a question. Provide the binary score as a JSON with a single key 'score' and no preamble or explanation.
     user Here is the answer:
    \n ------- \n
    {generation} 
    \n ------- \n
    Here is the question: {question} assistant""",
    input_variables=["generation", "question"],
)

answer_grader = answer_grading_prompt | llm | JsonOutputParser()

### Hallucination Grader

# Prompt Template for Hallucination Grading
hallucination_prompt = PromptTemplate(
    template=""" system You are a grader assessing whether 
    an answer is grounded in / supported by a set of facts. Give a binary 'yes' or 'no' score to indicate 
    whether the answer is grounded in / supported by a set of facts. Provide the binary score as a JSON with a 
    single key 'score' and no preamble or explanation. user
    Here are the facts:
    \n ------- \n
    {documents} 
    \n ------- \n
    Here is the answer: {generation}  assistant""",
    input_variables=["generation", "documents"],
)

hallucination_grader = hallucination_prompt | llm | JsonOutputParser()

### Main Workflow Function

def main_workflow(question):
    # Retrieve documents based on the user question
    docs = retriever.invoke(question)
    doc_txt = docs[0].page_content if docs else ""

    # Assess the relevance of the retrieved document
    result = retrieval_grader.invoke({"question": question, "document": doc_txt})

    if result["score"] == "no":
        print("The retrieved document is not relevant. Performing a web search...")
        # Perform a web search based on the user's question
        search_results = web_search_tool.invoke({"query": question})
        print("Web search results:", search_results)
    elif result["score"] == "yes":
        print("The retrieved document is relevant. Generating an answer...")
        # Generate answer based on the documents
        generation = rag_chain.invoke({"context": format_docs(docs), "question": question})
        print("Generated Answer:")
        print(generation)

        # Check the usefulness of the generated answer
        answer_result = answer_grader.invoke({"question": question, "generation": generation})
        if answer_result["score"] == "yes":
            print("Answer Grader: Pass")
            
            # Check for hallucinations
            hallucination_result = hallucination_grader.invoke({"documents": format_docs(docs), "generation": generation})
            if hallucination_result["score"] == "yes":
                print("Hallucination Grader: Pass")
            else:
                print("Hallucination Grader: Fail")
        else:
            print("Answer Grader: Fail")

### Run the Workflow

# Get user input for the question
question = input("Please enter your question: ")

# Execute the main workflow
main_workflow(question)


Please enter your question:  How many master's programs are taught in English?


The retrieved document is relevant. Generating an answer...
Generated Answer:
According to the context, there are 21 master's programs taught in English at the University of Oulu.
Answer Grader: Pass
Hallucination Grader: Pass
