In [1]:
! pip install -U langchain-nomic langchain_community tiktoken langchainhub chromadb langchain langgraph tavily-python gpt4all firecrawl-py

Collecting langchain-nomic
  Downloading langchain_nomic-0.1.2-py3-none-any.whl.metadata (1.3 kB)
Collecting langchain_community
  Downloading langchain_community-0.2.6-py3-none-any.whl.metadata (2.5 kB)
Collecting tiktoken
  Downloading tiktoken-0.7.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)
Collecting langchainhub
  Downloading langchainhub-0.1.20-py3-none-any.whl.metadata (659 bytes)
Collecting chromadb
  Downloading chromadb-0.5.3-py3-none-any.whl.metadata (6.8 kB)
Collecting langchain
  Downloading langchain-0.2.6-py3-none-any.whl.metadata (7.0 kB)
Collecting langgraph
  Downloading langgraph-0.1.5-py3-none-any.whl.metadata (11 kB)
Collecting tavily-python
  Downloading tavily_python-0.3.3-py3-none-any.whl.metadata (4.4 kB)
Collecting gpt4all
  Downloading gpt4all-2.7.0-py3-none-manylinux1_x86_64.whl.metadata (4.7 kB)
Collecting firecrawl-py
  Downloading firecrawl_py-0.0.20-py3-none-any.whl.metadata (6.3 kB)
Collecting langchain-core<0.3,>=0.1.

In [2]:
import os

os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_API_KEY'] = "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"

In [3]:
local_llm = 'llama3'

In [23]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.vectorstores.utils import filter_complex_metadata
from langchain_community.embeddings import GPT4AllEmbeddings
from langchain_community.document_loaders import FireCrawlLoader
from langchain.docstore.document import Document

urls = [
    "https://www.ai-jason.com/learning-ai/how-to-reduce-llm-cost",
    "https://www.ai-jason.com/learning-ai/gpt5-llm",
    "https://www.ai-jason.com/learning-ai/how-to-build-ai-agent-tutorial-3",
]

docs = [FireCrawlLoader(api_key="fc-2c0c01b022dc4d5594f085821f9ce90d", url=url, mode= "scrape").load() for url in urls]

# Split documents
docs_list = [item for sublist in docs for item in sublist]

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=250, chunk_overlap=0
)

docs_splits = text_splitter.split_documents(docs_list)

# Filter out complex metadata and ensure proper document formmating
filtered_docs = []
for doc in docs_splits:
    # Ensure the doc is an instance of Document and has a 'metadata' attribute
    if isinstance(doc, Document) and hasattr(doc, "metadata"):
        clean_metadata = {k: v for k, v in doc.metadata.items() if isinstance(v, (str, int, float, bool))}
        filtered_docs.append(Document(page_content=doc.page_content, metadata=clean_metadata))
        
# Add to vectorDB
vector_store = Chroma.from_documents(
    documents = filtered_docs,
    collection_name = "rag-chroma",
    embedding = GPT4AllEmbeddings(model_name="all-MiniLM-L6-v2.gguf2.f16.gguf", gpt4all_kwargs={'allow_download': 'True'}),
)

retriever = vector_store.as_retriever()   


## Grade Documents

In [26]:
# Retrieval Grader

from langchain.prompts import PromptTemplate
from langchain_community.chat_models import ChatOllama
from langchain_core.output_parsers import JsonOutputParser

# LLM
llm = ChatOllama(model=local_llm, format="json", temperature= 0)

prompt = PromptTemplate(
    template = """<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are a grader assessing relevance
    of a retrieved document to a user question. If the docuement contains keywords related to the user question,
    grade it as relevant. It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n
    Give a binary score 'yes' or 'no' to indicate wheter the document is relevant to the question. \n
    Provide the binary score as a JSON with a single key 'score' and no premable or explanation.
    <|eot_id|><|start_header_id|>user<|end_header_id|> 
    Here is the retrieved document: \n\n {document} \n\n
    Here is the user question: {question} \n <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    
    input_variables = ["document", "question"],
)

retrieval_grader = prompt | llm | JsonOutputParser()
question = "How to save llm cost?"
# question = "Is Joe Biden the president of the United States?"
docs = retriever.invoke(question)
doc_txt = docs[1].page_content
print(retrieval_grader.invoke({"document": doc_txt, "question": question}))

{'score': 'yes'}


## Generate the answer

In [27]:
# Generate

from langchain.prompts import PromptTemplate
from langchain import hub
from langchain_core.output_parsers import StrOutputParser

# Prompt
prompt = PromptTemplate(
    template = """<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are an assistant for question-answering tasks.
    Use the following pieces of retrived context to answer the question. If you don't know the answer, just say that you don't know.
    Use three sentences maximum and keep the answer concise. <|eot_id|><|start_header_id|>user<|end_header_id|>
    Question: {question}
    Context: {context}
    Answer: <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    
    input_variables = ["question", "context"],
)

# Post-processing

def format_docs(docs):
    return "\n\n".join([doc.page_content for doc in docs])

# Chain
rag_chain = prompt | llm | StrOutputParser()

# Run
question = "How to save llm cost?"
docs = retriever.invoke(question)
generation = rag_chain.invoke({"question": question, "context": format_docs(docs)})
print(generation)

{ "I don't know" : "The context does not provide specific information on how to save LLM costs. However, it mentions that you can reduce LLM costs by carefully selecting the right models for specific tasks, optimizing agent memory, and using techniques like LLM Lingua." }


## Web Search via Tavily

In [28]:
# Search
os.environ['TAVILY_API_KEY'] = "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"

from langchain_community.tools.tavily_search import TavilySearchResults
web_search_tool = TavilySearchResults(k=3)

## Hallucination Grader

In [30]:
# LLM
llm = ChatOllama(model=local_llm, format="json", temperature= 0)

# Prompt
prompt = PromptTemplate(
    template = """<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are a grader assesing whether an
    answer is grounded in / supported by a set of facts. Give a binary score 'yes' or 'no' to indicate whether the 
    answer is grounded in / supported by a set of facts. Provide the binary score as a JSON with a single key 'score'
    and no premable or explanation. <|eot_id|><|start_header_id|>user<|end_header_id|>
    Here are the facts: 
    \n --------- \n
    {documents}
    \n --------- \n
    Here is the answer: {generation} \n <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    
    input_variables = ["documents", "generation"],
)

hallucination_grader = prompt | llm | JsonOutputParser()
hallucination_grader.invoke({"documents": docs, "generation": generation})

KeyboardInterrupt: 

## Answer Grader

In [None]:
# LLM
llm = ChatOllama(model=local_llm, format="json", temperature= 0)

# Prompt
prompt = PromptTemplate(
    template = """<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are a grader assessing whether an
    answer is useful to resolve a question. Give a binary score 'yes' or 'no' to indicate whether the answer is useful
    to resolve the question. Provide the binary score as a JSON with a single key 'score' and no premable or explanation.
    <|eot_id|><|start_header_id|>user<|end_header_id|> Here is the answer:
    \n --------- \n
    {generation}
    \n --------- \n
    Here is the question: {question} \n <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    
    input_variables = ["generation", "question"],
)

answer_grader = prompt | llm | JsonOutputParser()
answer_grader.invoke({"generation": generation, "question": question})

## Lang graph - Setup states and nodes 

In [None]:
from typing_extensions import TypedDict
from typing import List

# State

class GraphState(TypedDict):
    question: str
    generation: str
    web_search: str
    documents: List[str]
    
from langchain.schema import Document

# Nodes

def retrieve(state):
    print("--- Retrieve ---")
    question = state["question"]
    documents = retriever.invoke(question)
    return {"documents": documents, question: question}

def generate(state):
    print("--- Generate ---")
    question = state["question"]
    documents = state["documents"]
    generation = rag_chain.invoke({"question": question, "context" : documents})
    return {"documents": documents, "question": question, "generation": generation}

def grade_documents(state):
    print("--- Check document relevance ---")
    question = state["question"]
    documents = state["documents"]
    
    # Score each document
    filtered_docs = []
    web_search = "No"
    for doc in documents:
        score = retrieval_grader.invoke({"document": doc.page_content, "question": question})
        grade = score["score"]
        
        # Document relevance
        if grade.lower() == "yes":
            print("--- Document is relevant ---")
            filtered_docs.append(doc)
        else:
            print("--- Document is not relevant ---")
            web_search = "Yes"
            continue
    return {"documents": filtered_docs, "question": question, "web_search": web_search}

def web_search(state):
    print("--- Web search ---")
    question = state["question"]
    documents = state["documents"]
    docs = web_search_tool.invoke({"query": question})
    web_results = "\n".join([d["content"] for d in docs])
    web_results = Document(page_content=web_results)
    
    if documents is not None:
        documents.append(web_results)
    else:
        documents = [web_results]
    return {"documents": documents, "question": question}


# Conditional edge

def decide_to_generate(state):
    print("--- Assess graded documents ---")
    question = state["question"]
    web_search = state["web_search"]
    filtered_documents = state["documents"]
    
    if web_search == "Yes":
        print("--- All documents are irrelevant, include web search ---")
        return "web_search"
    
    else:
        print("--- Decision: generate ---")
        return "generate"

# Conditional edge

def grade_generation_vs_documents_and_question(state):
    print("--- Check hallucination ---")
    question = state["question"]
    generation = state["generation"]
    documents = state["documents"]
    
    score = hallucination_grader.invoke({"documents": documents, "generation": generation})
    grade = score["score"]
    
    # Check hallucination
    if grade == "yes":
        print("--- Decision: generation is grounded documents ---")
        print("--- Grade generation vs question ---")
        score = answer_grader.invoke({"generation": generation, "question": question})
        grade = score["score"]
        if grade == "yes":
            print("--- Generation is useful ---")
            return "useful"
        else:
            print("--- Generation is not useful ---")
            return "not useful"
    else:
        print("--- Decision: generation is not grounded in documents, retry ---")
        return "not supported"

from langgraph.graph import END, SateGraph
workflow = SateGraph(GraphState)

# Nodes definition
workflow.add_node("web_search", web_search)
workflow.add_node("retrieve", retrieve)
workflow.add_node("grade_documents", grade_documents)
workflow.add_node("generate", generate)

In [None]:
# Graph building

workflow.set_entry_point("retrieve")
workflow.add_edge("retrieve", "grade_documents")
workflow.add_conditional_edges(
    "grade_documents",
    decide_to_generate,
    {
        "generate": "generate",
        "web_search": "web_search",
    },
)

workflow.add_edge("websearch", "generate")
workflow.add_conditional_edges(
    "generate",
    grade_generation_vs_documents_and_question,
    {
        "not supported": "generate",
        "useful": END,
        "not useful": "websearch",
    },
)

## Compile

In [None]:
app = workflow.compile()

# Test

from pprint import pprint
inputs = {"question": "How to save llm cost?"}

for output in app.stream(inputs):
    for key, value in output.items():
        pprint(f"Finished running: {key}:")
print(value["generation"])