In [1]:
%%capture
!pip install langchain langchain-community langchain-groq langchain-huggingface chromadb pypdf sentence-transformers

In [1]:
import os
import requests
from langchain_groq import ChatGroq
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
import dotenv



In [2]:
os.environ["GROQ_API_KEY"] = "XXXX"

In [3]:
pdf_file = "/content/uber_10k.pdf"

In [4]:
print("Processing PDF...")
loader = PyPDFLoader(pdf_file)

docs = loader.load_and_split()

Processing PDF...


In [5]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

In [6]:
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [7]:
vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

In [8]:
llm = ChatGroq(model_name="llama-3.3-70b-versatile", temperature=0)

template = """Answer the question based ONLY on the following context:
{context}

Question: {question}

If you don't know the answer based on the context, say "I don't know".
"""
prompt = ChatPromptTemplate.from_template(template)

def format_docs(docs):
    return "\n\n".join([d.page_content for d in docs])

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [9]:
def evaluate_response(question, answer, context_docs):
    eval_prompt = f"""
    You are an AI Grader. Rate the following answer on a scale of 0 to 1 for two metrics:

    1. Faithfulness: Is the answer derived PURELY from the context? (0 = Hallucination, 1 = Perfect)
    2. Relevance: Does the answer directly address the user's question? (0 = Irrelevant, 1 = Perfect)

    Context: {context_docs}
    Question: {question}
    Answer: {answer}

    Output format exactly like this:
    Faithfulness: 0.X
    Relevance: 0.X
    Explanation: Short reason.
    """

    grader = ChatGroq(model_name="llama-3.3-70b-versatile", temperature=0)
    grade = grader.invoke(eval_prompt).content
    return grade

In [11]:
query = "What are the risk factors related to Uber's business?"
print(f"\nQuestion: {query}")

retrieved_docs = retriever.invoke(query)
context_text = format_docs(retrieved_docs)

print("Generating Answer...")
answer = rag_chain.invoke(query)
print(f"Answer: {answer}\n")

print("Running Auto-Evaluation...")
score = evaluate_response(query, answer, context_text)
print("-" * 30)
print("EVALUATION REPORT")
print("-" * 30)
print(score)


Question: What are the risk factors related to Uber's business?
Generating Answer...
Answer: The risk factors related to Uber's business are:

1. The COVID-19 pandemic and the impact of actions to mitigate the pandemic.
2. The potential reclassification of Drivers as employees, workers, or quasi-employees instead of independent contractors.
3. The highly competitive nature of the mobility, delivery, and logistics industries, with well-established and low-cost alternatives, low barriers to entry, low switching costs, and well-capitalized competitors.

These risk factors could have an adverse effect on Uber's business, financial condition, operating results, or prospects.

Running Auto-Evaluation...
------------------------------
EVALUATION REPORT
------------------------------
Faithfulness: 1.0
Relevance: 1.0
Explanation: The answer is directly derived from the provided context, specifically the "Risk Factor Summary" section, and perfectly addresses the user's question about the risk f

In [12]:
query = "What was Uber's total revenue for the year ended December 31, 2021?"
print(f"\nQuestion: {query}")

retrieved_docs = retriever.invoke(query)
context_text = format_docs(retrieved_docs)

print("Generating Answer...")
answer = rag_chain.invoke(query)
print(f"Answer: {answer}\n")

print("Running Auto-Evaluation...")
score = evaluate_response(query, answer, context_text)
print("-" * 30)
print("EVALUATION REPORT")
print("-" * 30)
print(score)


Question: What was Uber's total revenue for the year ended December 31, 2021?
Generating Answer...
Answer: Uber's total revenue for the year ended December 31, 2021 was $17,455 million.

Running Auto-Evaluation...
------------------------------
EVALUATION REPORT
------------------------------
Faithfulness: 1.0
Relevance: 1.0
Explanation: The answer is directly derived from the context, specifically from the "Financial and Operational Highlights" section, and directly addresses the user's question about Uber's total revenue for the year ended December 31, 2021.


In [13]:
query = "How did the COVID-19 pandemic impact Uber's Gross Bookings and business operations?"
print(f"\nQuestion: {query}")

retrieved_docs = retriever.invoke(query)
context_text = format_docs(retrieved_docs)

print("Generating Answer...")
answer = rag_chain.invoke(query)
print(f"Answer: {answer}\n")

print("Running Auto-Evaluation...")
score = evaluate_response(query, answer, context_text)
print("-" * 30)
print("EVALUATION REPORT")
print("-" * 30)
print(score)


Question: How did the COVID-19 pandemic impact Uber's Gross Bookings and business operations?
Generating Answer...
Answer: The COVID-19 pandemic had a significant impact on Uber's business operations. Initially, it led to a decline in demand for Mobility rides due to various governmental restrictions, quarantines, and travel limitations. However, the pandemic also led to an increase in food delivery orders, resulting in a 66% growth in Delivery Gross Bookings in 2021, driven by stay-at-home orders and expansion into new markets. Mobility Gross Bookings also recovered, growing 36% in 2021, as the business rebounded from the pandemic's impacts. Overall, Uber's Gross Bookings increased by $32.5 billion in 2021, up 56% compared to 2020.

Running Auto-Evaluation...
------------------------------
EVALUATION REPORT
------------------------------
Faithfulness: 0.9
Relevance: 1.0
Explanation: The answer is derived almost entirely from the context, with minor rephrasing and summarization, makin

In [14]:
query = "What is the status of legal proceedings regarding the classification of drivers as independent contractors?"
print(f"\nQuestion: {query}")

retrieved_docs = retriever.invoke(query)
context_text = format_docs(retrieved_docs)

print("Generating Answer...")
answer = rag_chain.invoke(query)
print(f"Answer: {answer}\n")

print("Running Auto-Evaluation...")
score = evaluate_response(query, answer, context_text)
print("-" * 30)
print("EVALUATION REPORT")
print("-" * 30)
print(score)


Question: What is the status of legal proceedings regarding the classification of drivers as independent contractors?
Generating Answer...
Answer: The company is involved in numerous legal proceedings globally, including putative class and collective class action lawsuits, demands for arbitration, charges and claims before administrative agencies, and investigations or audits by labor, social security, and tax authorities, that challenge the classification of drivers as independent contractors. These proceedings are ongoing in various jurisdictions, including the United States and abroad, with particular note of proceedings in California.

Running Auto-Evaluation...
------------------------------
EVALUATION REPORT
------------------------------
Faithfulness: 1.0
Relevance: 1.0
Explanation: The answer is directly derived from the context and perfectly addresses the user's question, providing a clear and concise summary of the status of legal proceedings regarding the classification of 