In [25]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_community.document_loaders import FireCrawlLoader
from langchain_community.vectorstores.utils import filter_complex_metadata
from langchain.docstore.document import Document

local_llm = "llama3"

urls = [
    "https://mormonr.org/qnas/a9l1T/the_kinderhook_plates",
]

docs = [
    FireCrawlLoader(
        api_key="fc-d616f0b9d1b44e6485daa619b6ac1bfe", url=url, mode="scrape"
    ).load()
    for url in urls
]

# Split documents
# docs_list = [item for sublist in docs for item in sublist]
docs_list = []
for sublist in docs:
    for item in sublist:
        docs_list.append(item)

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=1024,
    chunk_overlap=20,
)

docs_splits = text_splitter.split_documents(docs_list)

# Filter out complex metadata and ensure proper document formatting
filtered_docs = []
for doc in docs_splits:
    # Ensure the doc is an instance of Document and has a 'metadata' attribute
    if isinstance(doc, Document) and hasattr(doc, "metadata"):
        clean_metadata = {
            k: v
            for k, v in doc.metadata.items()
            if isinstance(v, (str, int, float, bool))
        }
        filtered_docs.append(Document(doc.page_content, metadata=clean_metadata))


# Add to vectorDB
vectorstore = Chroma.from_documents(
    documents=filtered_docs,
    collection_name="rag-chroma",
    embedding=OpenAIEmbeddings(),
)

retriever = vectorstore.as_retriever()

In [26]:
## Retrieval Grader

from langchain.prompts import PromptTemplate
from langchain_community.chat_models import ChatOllama
from langchain_core.output_parsers import JsonOutputParser

llm = ChatOllama(temperature=0, format="json", model=local_llm)

prompt = PromptTemplate(
    template="""
    <|begin_of_text|><|start_header_id|>system<|end_header_id|> 
    You are a grader assessing relevance of a retrieved doucment to a user question. If the document contains keywords related to the user queston, grade it as relevant. It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n
    Give the binary score, 'yes' or 'no' score, to indicate whether the retrieved document is relevant to question. \n 
    Provide the binary score as a JSON with a single key 'score' and no premable or explanation. <|eot_id|><|start_header_id|>user<|end_header_id|>
    Here is the retrieved document: {document} \n
    Here is the user question: {question} \n <|eot_id|><|start_header_id|>assistant<|end_header_id|>
    """,
    input_variables=["document", "question"],  # do I really need this?
)

retrieval_grader = prompt | llm | JsonOutputParser()
question = (
    "Does kinderhook plates prove that Joseph Smith as a false prophet or a liar?"
)
docs = retriever.invoke(question)

doc_text = docs[1].page_content
print(retrieval_grader.invoke({"question": question, "document": doc_text}))

{'score': 'yes'}


### Generate Answer

In [27]:
# Generate
from langchain import hub
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser

# prompt
prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are an assistant for question-answering tasks.
    Use the following pieces of retrived context to answer the question. When you answer the question, it's MOST important that you back it up with specific citations or source links. If the documents use footnotes, be sure to track it down and use the information provided by the footnotes. Provide the URL if you are using a source link. Give as many realiable sources you find in the documents. If there are conflicts or inconsistency between multiple sources you found from the retrived context, choose one based on sound logic (i.e., firsthand accounts are preferred over second hands account, verified resarch with newer dates are preferred) and explain why you made the choce. See the examples below. 
    
    Example 1
    Input: 
    Did Emma Smith push Fanny Alger down the stairs and cause her to miscarry?

    Output:
    Answer: No, based on this article, [source 1] This has been confused with a story circulated about Eliza R. Snow,[source 2, 3] but that story is unlikely to be true. [source 4]
    
    Sources:
    [1] https://mormonr.org/qnas/qp3yc/joseph_smith_and_fanny_alger
    [2] https://mormonr.org/qnas/qp3yc/joseph_smith_and_fanny_alger#bio-0MvZJi
    [3] https://mormonr.org/qnas/qp3yc/joseph_smith_and_fanny_alger#footnote-marker-55  
    [4] https://mormonr.org/qnas/qp3yc/joseph_smith_and_fanny_alger/research#re-psWfCb-sF7Akb 
             
    
    Example 2
    Input:
    Were Joseph and Fanny caught having sex in a barn?

    Answer: Most unlikely. The only historical record with that detail comes from an 1872 account from William McLellin [source 1], who claimed Joseph and Fanny were "caught in the act" of being "sealed" in a barn by Emma Smith.[source 2, 3, 4] In addition to the account being thirdhand and a recollection from many decades later, McLellin had been excommunicated for apostasy, had a personal vendetta against Joseph, and was an active participant in the Missouri mobs.[source 5, 6]
    
    Sources:
    [1] https://mormonr.org/qnas/qp3yc/joseph_smith_and_fanny_alger#bio-0lnabw 
    [2] https://mormonr.org/qnas/qp3yc/joseph_smith_and_fanny_alger#bio-mgbYrb 
    [3] https://mormonr.org/qnas/qp3yc/joseph_smith_and_fanny_alger#footnote-50 
    [4] https://mormonr.org/qnas/qp3yc/joseph_smith_and_fanny_alger/research#re-jZTiDc-eUuNic 
    [5] https://mormonr.org/qnas/qp3yc/joseph_smith_and_fanny_alger#footnote-51 
    [6] https://mormonr.org/qnas/qp3yc/joseph_smith_and_fanny_alger/research#re-psWfCb-OmYWic 
    

    If you don't know the answer, just say that you don't know. <|eot_id|><|start_header_id|>user<|end_header_id|>
    Question: {question}
    Context: {context}
    Answer: <|eot_id|><|start_header_id|>assistant<|end_header_id|>
    """,
    input_variables=["question", "context"],
)

llm = ChatOpenAI(temperature=0.5, model="gpt-4o")

rag_chain = prompt | llm | StrOutputParser()

question = (
    "Does kinderhook plates prove that Joseph Smith as a false prophet or a liar?"
)
docs = retriever.invoke(question)
generation = rag_chain.invoke({"context": docs, "question": "question"})
print(generation)

The question of whether Joseph Smith attempted to translate the Kinderhook Plates is a matter of historical debate. 

Critics argue that Joseph Smith did try to translate the Kinderhook Plates, based on documentary evidence from the period (source: [Religious Studies Center](https://rsc.byu.edu/no-weapon-shall-prosper/did-joseph-smith-translate-kinderhook-plates)). However, Latter-day Saint researchers acknowledge that while the plates were a forgery, they dispute the claim that Joseph Smith actually translated them. Instead, they suggest that he offered only a cursory translation of one symbol and did not pursue further translation (source: [Mormonr](https://mormonr.org/qnas/a9l1T/the_kinderhook_plates)).

The historical record indicates that Joseph Smith intended to use secular methods rather than revelation for the translation, and after a preliminary examination, he did not inquire about the plates again (source: [Mormonr](https://mormonr.org/qnas/a9l1T/the_kinderhook_plates)).

Gi

In [28]:
import requests


def scrape_jina_ai(url: str) -> str:
    response = requests.get("https://r.jina.ai/" + url)
    return response.text


jina_response = scrape_jina_ai("https://mormonr.org/qnas/a9l1T/the_kinderhook_plates")
print(jina_response)

Title: The Kinderhook Plates

URL Source: https://mormonr.org/qnas/a9l1T/the_kinderhook_plates

Markdown Content:
What are the Kinderhook plates?

The Kinderhook plates are a set of six small (three inches in height), bell-shaped brass plates with inscriptions[\[1\]](#footnote-1) that a group of men—Wilburn Fugate,[\[BIO\]](#bio-1wdTUb) Robert Wiley,[\[BIO\]](#bio-sApvib) and Bridge Whitton[\[BIO\]](#bio-0Jgb4r)—forged;[\[2\]](#footnote-2) planted in a mound[\[3\]](#footnote-3) near Kinderhook, Illinois; and “excavated”[\[4\]](#footnote-4) in the spring of 1843. According to Fugate, the plates were fabricated as a sort of prank or joke.[\[5\]](#footnote-5)

How do we know they are fake?

One of the conspirators who participated in the hoax, Wilburn Fugate, admitted to them being fakes in 1879.[\[6\]](#footnote-6) Modern scientific tests[\[7\]](#footnote-7) also confirmed that the last-known surviving plate is of modern, not ancient, origin.[\[8\]](#footnote-8)

Was Joseph Smith tricked

In [29]:
import firecrawl
import getpass

FIRECRAWL_API_KEY = getpass.getpass("Mendable API Key: ")


def scrape_firecrawl(url: str):
    app = firecrawl.FirecrawlApp(api_key=FIRECRAWL_API_KEY)
    scraped_data = app.scrape_url(url)["markdown"]
    return scraped_data