# LLAMA3 RAG

### Index

In [8]:
import os
from dotenv import load_dotenv
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.document_loaders import FireCrawlLoader
from langchain_community.vectorstores.utils import filter_complex_metadata
from langchain.docstore.document import Document

# Load environment variables from .env file
load_dotenv()

local_llm = "llama3"
# compare it with gpt3.5-turbo --> local_llm = ChatOpenAI(temperature=0)

urls = [
    "https://mormonr.org/qnas/a9l1T/the_kinderhook_plates",
    "https://rsc.byu.edu/no-weapon-shall-prosper/did-joseph-smith-translate-kinderhook-plates",
    "https://www.fairlatterdaysaints.org/answers/Kinderhook_Plates",
    "https://www.churchofjesuschrist.org/study/ensign/1981/08/kinderhook-plates-brought-to-joseph-smith-appear-to-be-a-nineteenth-century-hoax",
]

docs = [
    FireCrawlLoader(
        api_key=os.getenv("FIRECRAWL_API_KEY"), url=url, mode="scrape"
    ).load()
    for url in urls
]

# Split documents
docs_list = []
for sublist in docs:
    for item in sublist:
        docs_list.append(item)

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=1024,
    chunk_overlap=30,
)

docs_splits = text_splitter.split_documents(docs_list)

# Filter out complex metadata and ensure proper document formatting
filtered_docs = []
for doc in docs_splits:
    # Ensure the doc is an instance of Document and has a 'metadata' attribute
    if isinstance(doc, Document) and hasattr(doc, "metadata"):
        clean_metadata = {
            k: v
            for k, v in doc.metadata.items()
            if isinstance(v, (str, int, float, bool))
        }
        filtered_docs.append(Document(doc.page_content, metadata=clean_metadata))

# Add to vectorDB. For deployment, we want to use pinecone
vectorstore = Chroma.from_documents(
    documents=filtered_docs,
    collection_name="rag-chroma",
    embedding=OpenAIEmbeddings(),
)

retriever = vectorstore.as_retriever()

### Web Search via Tavily

In [9]:
travily_api_key = os.getenv("TAVILY_API_KEY")

from langchain_community.tools.tavily_search import TavilySearchResults

web_search_tool = TavilySearchResults(k=3, api_key=travily_api_key)

In [4]:
# Just playing with Jina AI for readable crawl, it's free! But it didn't parse footnotes, perhaps llamaparse or llamaindex would do a better job.
# import requests


# def scrape_jina_ai(url: str) -> str:
#     response = requests.get("https://r.jina.ai/" + url)
#     return response.text


# jina_response = scrape_jina_ai("https://mormonr.org/qnas/a9l1T/the_kinderhook_plates")
# print(jina_response)

### Retrieval Grader

In [10]:
from langchain.prompts import PromptTemplate
from langchain_community.chat_models import ChatOllama
from langchain_core.output_parsers import JsonOutputParser

llm = ChatOllama(temperature=0, format="json", model=local_llm)

prompt = PromptTemplate(
    template="""
    <|begin_of_text|><|start_header_id|>system<|end_header_id|> 
    You are a grader assessing relevance of a retrieved doucment to a user question. If the document contains keywords related to the user queston, grade it as relevant. It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n
    Give the binary score, 'yes' or 'no' score, to indicate whether the retrieved document is relevant to question. \n 
    Provide the binary score as a JSON with a single key 'score' and no premable or explanation. <|eot_id|><|start_header_id|>user<|end_header_id|>
    Here is the retrieved document: {document} \n
    Here is the user question: {question} \n <|eot_id|><|start_header_id|>assistant<|end_header_id|>
    """,
    input_variables=["document", "question"],  # do I really need this?
)

retrieval_grader = prompt | llm | JsonOutputParser()
question = (
    "Does kinderhook plates prove that Joseph Smith as a false prophet or a liar?"
)
docs = retriever.invoke(question)

doc_text = docs[1].page_content
print(retrieval_grader.invoke({"question": question, "document": doc_text}))

{'score': 'yes'}


### Generate Answer

In [11]:
# Generate
from langchain import hub
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser

# prompt
prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are an assistant for question-answering tasks.
    Use the following pieces of retrived context to answer the question. When you answer the question, it's MOST important that you back it up with specific citations or source links. If the documents use footnotes, be sure to track it down and use the information provided by the footnotes. Provide the URL if you are using a source link. Give as many realiable sources you find in the documents. If there are conflicts or inconsistency between multiple sources you found from the retrived context, choose one based on sound logic (i.e., firsthand accounts are preferred over second hands account, verified resarch with newer dates are preferred) and explain why you made the choce. See the examples below. 
    
    Example 1
    Input: 
    Did Emma Smith push Fanny Alger down the stairs and cause her to miscarry?

    Output:
    Answer: No, based on this article, [source 1] This has been confused with a story circulated about Eliza R. Snow,[source 2, 3] but that story is unlikely to be true. [source 4]
    
    Sources:
    [1] https://mormonr.org/qnas/qp3yc/joseph_smith_and_fanny_alger
    [2] https://mormonr.org/qnas/qp3yc/joseph_smith_and_fanny_alger#bio-0MvZJi
    [3] https://mormonr.org/qnas/qp3yc/joseph_smith_and_fanny_alger#footnote-marker-55  
    [4] https://mormonr.org/qnas/qp3yc/joseph_smith_and_fanny_alger/research#re-psWfCb-sF7Akb 
             
    
    Example 2
    Input:
    Were Joseph and Fanny caught having sex in a barn?

    Answer: Most unlikely. The only historical record with that detail comes from an 1872 account from William McLellin [source 1], who claimed Joseph and Fanny were "caught in the act" of being "sealed" in a barn by Emma Smith.[source 2, 3, 4] In addition to the account being thirdhand and a recollection from many decades later, McLellin had been excommunicated for apostasy, had a personal vendetta against Joseph, and was an active participant in the Missouri mobs.[source 5, 6]
    
    Sources:
    [1] https://mormonr.org/qnas/qp3yc/joseph_smith_and_fanny_alger#bio-0lnabw 
    [2] https://mormonr.org/qnas/qp3yc/joseph_smith_and_fanny_alger#bio-mgbYrb 
    [3] https://mormonr.org/qnas/qp3yc/joseph_smith_and_fanny_alger#footnote-50 
    [4] https://mormonr.org/qnas/qp3yc/joseph_smith_and_fanny_alger/research#re-jZTiDc-eUuNic 
    [5] https://mormonr.org/qnas/qp3yc/joseph_smith_and_fanny_alger#footnote-51 
    [6] https://mormonr.org/qnas/qp3yc/joseph_smith_and_fanny_alger/research#re-psWfCb-OmYWic 
    

    If you don't know the answer, just say that you don't know. <|eot_id|><|start_header_id|>user<|end_header_id|>
    Question: {question}
    Context: {context}
    Answer: <|eot_id|><|start_header_id|>assistant<|end_header_id|>
    """,
    input_variables=["question", "context"],
)

llm = ChatOpenAI(temperature=0.5, model="gpt-4o")

rag_chain = prompt | llm | StrOutputParser()

question = (
    "Does kinderhook plates prove that Joseph Smith as a false prophet or a liar?"
)
docs = retriever.invoke(question)
generation = rag_chain.invoke({"context": docs, "question": "question"})
print(generation)

Based on the provided context, it is clear that the Kinderhook Plates were a nineteenth-century hoax. Joseph Smith did not make a translation of the plates, and there is no evidence that he showed further interest in them after an initial examination. The plates were intended to deceive Joseph Smith into making a translation of the characters etched into them, but he did not fall for the hoax.

For more detailed information, you can refer to the following sources:

1. "The Kinderhook Plates," available on Mormonr.org: [https://mormonr.org/qnas/a9l1T/the_kinderhook_plates](https://mormonr.org/qnas/a9l1T/the_kinderhook_plates)
2. "Kinderhook Plates Brought to Joseph Smith Appear to Be a Nineteenth-Century Hoax," Ensign, August 1981: [https://www.churchofjesuschrist.org/study/eng/ensign/1981/08/kinderhook-plates-brought-to-joseph-smith-appear-to-be-a-nineteenth-century-hoax](https://www.churchofjesuschrist.org/study/eng/ensign/1981/08/kinderhook-plates-brought-to-joseph-smith-appear-to-be

### Hallucination Grader

In [25]:
llm = ChatOllama(model=local_llm, format="json", temperature=0)

prompt = PromptTemplate.from_template(
    """<|begin_of_text|>
<|start_header_id|>system<|end_header_id|>
You are a grader assessing whether a generated answer is grounded in / supported by a set of facts. Your response MUST be a simple JSON object with a single key 'score' and a value of either 'yes' or 'no'. Do not include any explanations, preambles, or additional information.

<|eot_id|>
<|start_header_id|>user<|end_header_id|>
Here are the facts:
-------
{documents}
-------

Here is the generated answer:
{generation}

Is the generated answer grounded in and supported by the given facts? Respond with ONLY a JSON object in the format {{"score": "yes"}} or {{"score": "no"}}.

<|eot_id|>
<|start_header_id|>assistant<|end_header_id|>
"""
)

hallucination_grader = prompt | llm | JsonOutputParser()
result = hallucination_grader.invoke({"documents": docs, "generation": generation})


# prompt = PromptTemplate.from_template(
#     """<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are a grader assessing whether a generated answer is grounded in / supported by a set of facts. The response MUST give a binary score: 'yes' or 'no' to indicate whether the answer is grounded in / supported by a set of facts. Provide the binary score as a JSON with a single key 'score' and no preamble or explanation.<|eot_id|><|start_header_id|>user<|end_header_id|>
#     Here are the facts:
#     \n-------\n
#     {documents}
#     \n-------\n
#     Here is the generated answer: {generation} <|eot_id|><|start_header_id|>assistant<|end_header_id|>
#     """,
# )

# hallucination_grader = prompt | llm | JsonOutputParser()
# hallucination_grader.invoke({"documents": docs, "generation": generation})

{'score': 5,
 'rationale': "The generated answer accurately summarizes the provided context and provides supporting evidence from credible sources. The answer clearly states that the Kinderhook Plates were a nineteenth-century hoax, citing Joseph Smith's lack of interest in the plates after an initial examination and the intention to deceive him into making a translation. The inclusion of external sources (Mormonr.org and Ensign) adds credibility to the answer and provides further information for readers who want to learn more."}

### LangGraph - Setup Sate & Nodes

In [13]:
from typing import List
from typing_extensions import TypedDict


# State
class GraphState(TypedDict):
    """
    Represents the state of our graph.

    Attibutes:
        question: question
        generation: LLM generation
        web_search: whether to add search
        documents: list of documents
    """

    question: str
    generation: str
    web_search: str
    documents: List[str]


# Nodes
def retrieve(state):
    """
    Retrieve documents from vectorstore

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, "documents", which contains retrieved documents
    """
    print("---RETRIEVE---")
    question = state["question"]

    # Retrieval
    documents = retriever.invoke(question)
    return {"documents": documents, "question": question}


def grade_documents(state):
    """
    Determines whether the retrived documents are relevant to the quetion. If any document is not relevant, we will set a flag to run web search

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Filtered out irrelevant documents and updated web_search state
    """

    print("---CHECK DOCUMENT RELEVANCE TO QUESTION---")
    question = state["question"]
    documents = state["documents"]

    # Grade each doc
    filtered_docs = []
    web_search = "No"
    for doc in documents:
        score = retrieval_grader.invoke(
            {"question": question, "document": doc.page_content}
        )
        grade = score["score"]
        # Document relevant
        if grade.lower() == "yes":
            print("---GRADE: DOCUMENT IS RELEVANT---")
            filtered_docs.append(doc)
        # Document not relevant
        else:
            print("---GRADE: DOCUMENT IS NOT RELEVANT---")
            web_search = "Yes"
            continue
    return {
        "documents": filtered_docs,
        "question": question,
        "web_search": web_search,
    }


def generate(state):
    """
        Generate answer using RAG on retreived documents

    `   Args:
            state (dict): The current graph state

        Returns:
            state (dict): New key added to state, "generation" , which contains LLM generation
    """
    print("---GENERATE---")
    question = state["question"]
    documents = state["documents"]

    # RAG generation
    generation = rag_chain.invoke({"context": documents, "question": question})
    return {
        "documents": documents,
        "question": question,
        "generation": generation,
    }


def web_search(state):
    """
    Web search based on the question

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Appended web results to documents
    """

    print("---WEB SEARCH---")
    question = state["question"]
    documents = state["documents"]

    # Web search
    docs = web_search_tool.invoke({"query: question"})
    web_results = "\n".join([d["content"] for d in docs])
    web_results = Document(page_content=web_results)
    if documents is not None:
        documents.append(web_results)
    else:
        documents = [web_results]
    return {
        "documents": documents,
        "question": question,
    }


# Conditional edge
def decide_to_generate(state):
    """
    Determine whether to generate an answer, or add web search

    Args:
        state (dict): The current graph state

    Returns:
        str: Binary decision for next node to call
    """

    print("---ASSESS GRADED DOCUMENTS---")
    question = state["question"]
    web_search = state["web_search"]
    filtered_documents = state["documents"]

    if web_search == "Yes":
        # All documents have been filtered for not being relevant
        # We will regenerate a new query
        print(
            "---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION. INCLUDE WEB SEARCH---"
        )
        return "websearch"
    else:
        # We have relevant documents, so generate answer
        print("---DECISION: GENERATE---")
        return "generate"


def grade_generation_v_documents_and_question(state):
    print("---CHECK HALLUCINATIONS---")
    question = state["question"]
    documents = state["documents"]
    generation = state["generation"]

    # Print documents and generation for debugging
    print(f"Documents: {documents}")
    print(f"Generation: {generation}")

    # Invoke hallucination grader
    score = hallucination_grader.invoke(
        {"documents": documents, "generation": generation}
    )

    # Print score for debugging
    print(f"Hallucination Grader Output: {score}")

    # Check if 'score' key is present
    if "score" not in score:
        raise KeyError(
            "The 'score' key is missing in the output from the hallucination grader"
        )

    grade = score["score"]

    # Check hallucination
    if grade == "yes":
        print("---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---")
        # Check question-answering
        print("---GRADE GENERATION vs QUESTION---")
        score = retrieval_grader.invoke(
            {"documents": documents, "generation": generation}
        )

        # Print score for debugging
        print(f"Retrieval Grader Output: {score}")

        # Check if 'score' key is present
        if "score" not in score:
            raise KeyError(
                "The 'score' key is missing in the output from the retrieval grader"
            )

        grade = score["score"]
        if grade == "yes":
            print("---DECISION: GENERATION ADDRESSES QUESTION---")
            return "useful"
        else:
            print("---DECISION: GENERATION DOES NOT ADDRESS QUESTION---")
            return "not useful"
    else:
        print("---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, PLEASE RETRY---")
        return "not supported"


# def grade_generation_v_documents_and_question(state):
#     """
#     Determine whether to generate an answer, or add web search

#     Args:
#         state (dict): The current graph state

#     Returns:
#         str: Binary decision for next node to call
#     """
#     print("---CHECK HALLUCINATIONS---")
#     question = state["question"]
#     documents = state["documents"]
#     generation = state["generation"]

#     score = hallucination_grader.invoke(
#         {"documents": documents, "generation": generation}
#     )
#     grade = score["score"]

#     # Check hallucination
#     if grade == "yes":
#         print("---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---")
#         # Check question-answering
#         print("---GRADE GENERATION vs QUESTION---")
#         score = retrieval_grader.invoke(
#             {"documents": documents, "generation": generation}
#         )
#         grade = score["score"]
#         if grade == "yes":
#             print("---DECISION: GENERATION ADDRESSES QUESTION")
#             return "useful"
#         else:
#             print("---DECISION: GENERATION DOES NOT ADDRESS QUESTION---")
#             return "not useful"
#     else:
#         print("---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, PLEASE RETRY---")
#         return "not supported"

### Build Graph

In [14]:
from langgraph.graph import END, StateGraph

workflow = StateGraph(GraphState)

# Define the nodes
workflow.add_node("websearch", web_search)
workflow.add_node("retrieve", retrieve)
workflow.add_node("grade_documents", grade_documents)
workflow.add_node("generate", generate)

# Set the entry point of the workflow
workflow.set_entry_point("retrieve")

# Define edges
workflow.add_edge("retrieve", "grade_documents")
workflow.add_conditional_edges(
    "grade_documents",
    decide_to_generate,
    {
        "websearch": "websearch",
        "generate": "generate",
    },
)
workflow.add_edge("websearch", "generate")
workflow.add_conditional_edges(
    "generate",
    grade_generation_v_documents_and_question,
    {
        "not supported": "generate",
        "useful": END,
        "not useful": "websearch",
    },
)

In [15]:
# Compile
app = workflow.compile()

# Test
from pprint import pprint

inputs = {"question": "Did Joseph Smith Translate the Kinderhook Plates?"}
for output in app.stream(inputs):
    for key, value in output.items():
        pprint(f"Finished running: {key}:")
print(value["generation"])

---RETRIEVE---
'Finished running: retrieve:'
---CHECK DOCUMENT RELEVANCE TO QUESTION---
---GRADE: DOCUMENT IS RELEVANT---
---GRADE: DOCUMENT IS RELEVANT---
---GRADE: DOCUMENT IS RELEVANT---
---GRADE: DOCUMENT IS RELEVANT---
---ASSESS GRADED DOCUMENTS---
---DECISION: GENERATE---
'Finished running: grade_documents:'
---GENERATE---
---CHECK HALLUCINATIONS---
Documents: [Document(page_content='On April 23, 1843, nine men unearthed human bones and six small, bell-shaped plates in Kinderhook, Illinois, situated about seventy miles south of Nauvoo. Both sides of the plates apparently contained some sort of ancient writings. This discovery was reported in the Quincy Whig and then reprinted in the Times and Seasons (May 1, 1843, 185–87). The plates, later known as the Kinderhook Plates, made their way to Nauvoo and were presented to Joseph Smith, who was reported to have said, according to the History of the Church, “I have translated a portion of them, and find they contain the history of the 

KeyError: "The 'score' key is missing in the output from the hallucination grader"