In [None]:
!pip install langchain
!pip install langchain_chroma
!pip install langchain_openai



In [None]:
# Week 4: Context Router Lab

from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_chroma import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
from langchain.prompts import PromptTemplate
import os
from google.colab import userdata


# 1. Setup
OPENAI_API_KEY= userdata.get("api_key")


llm = ChatOpenAI(model="gpt-4o-mini", api_key = OPENAI_API_KEY)

# 2. Create sample documents
docs = [
    Document(page_content="Apple's Q2 revenue grew by 10% driven by iPhone sales."),
    Document(page_content="Tesla is focusing on self-driving cars and EV batteries."),
    Document(page_content="Microsoft announced new AI features in Azure cloud."),
    Document(page_content="The Federal Reserve raised interest rates by 0.25%."),
    Document(page_content="Amazon is expanding its logistics network in Europe."),
    Document(page_content = "Airbnb experiencing significant travel issues due to tariffs")
]

# 3. Split documents into chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=20)
split_docs = splitter.split_documents(docs)

# 4. Store in vector DB
embeddings = OpenAIEmbeddings(api_key=OPENAI_API_KEY)
db = Chroma.from_documents(split_docs, embeddings)

retriever = db.as_retriever(search_kwargs={"k": 3})

# 5. Routing logic: direct queries to specific categories
def route_query(query: str, retriever):
    if "revenue" in query or "finance" in query:
        print("Routing to finance-related docs...")
        # naive filter: only return docs with financial terms
        results = [doc for doc in retriever.get_relevant_documents(query) if "revenue" in doc.page_content or "interest" in doc.page_content]
    else:
        print("Routing to general retriever...")
        results = retriever.get_relevant_documents(query)
    return results

# 6. Ask a query
query = "How are companies in the travel industry performing"
context_docs = route_query(query, retriever)

# 7. Build final prompt
template = """You are a general assistant.
Answer the question using the context below:

Context:
{context}

Question: {question}
"""
prompt = PromptTemplate.from_template(template)

final_prompt = prompt.format(
    context="\n".join([d.page_content for d in context_docs]),
    question=query
)

print("==== Final Prompt to LLM ====")
print(final_prompt)

response = llm.invoke(final_prompt)
print("\nLLM Response:", response.content)

Routing to general retriever...
==== Final Prompt to LLM ====
You are a general assistant.
Answer the question using the context below:

Context:
Airbnb experiencing significant travel issues due to tariffs
Airbnb experiencing significant travel issues due to tariffs
Airbnb experiencing significant travel issues due to tariffs

Question: How are companies in the travel industry performing


LLM Response: Companies in the travel industry, including Airbnb, are currently facing significant challenges due to tariffs, which are impacting their operations and customer travel experiences. This suggests that their performance may be adversely affected as they navigate these travel issues.


In [None]:
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_chroma import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
import os

os.environ["OPENAI_API_KEY"] = userdata.get("api_key")

llm = ChatOpenAI(model="gpt-4o-mini")

# 1. Sample knowledge base
docs = [
    Document(page_content="Apple's revenue grew 10% due to iPhone sales."),
    Document(page_content="Apple plans to expand in India with new factories."),
    Document(page_content="Tesla focuses on EV production and autonomous driving."),
    Document(page_content="The Federal Reserve raised interest rates to fight inflation."),
    Document(page_content="Microsoft invests heavily in cloud and AI infrastructure."),
    Document(page_content="Europe is producing less iphones due to macro economic environemnt and regulation. Apple has heavy regulation"),
    Document(page_content="Apple stock rose after strong quarterly earnings."),
]

# 2. Split and embed
splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=20)
split_docs = splitter.split_documents(docs)

embeddings = OpenAIEmbeddings()
db = Chroma.from_documents(split_docs, embeddings)

# -----------------------------
# PIPELINE
# -----------------------------

query = "How is iphone results going to be like"

# --- (A) Retriever: semantic search ---
retriever = db.as_retriever(search_kwargs={"k": 5})
retrieved_docs = retriever.get_relevant_documents(query)

print("\n[Retriever Results]")
for d in retrieved_docs:
    print("-", d.page_content)

# --- (B) Re-ranker: naive scoring by keyword overlap ---
def rerank(query, docs):
    scored = []
    for d in docs:
        score = sum(word in d.page_content.lower() for word in query.lower().split())
        scored.append((score, d))
    scored = sorted(scored, key=lambda x: x[0], reverse=True)
    return [d for _, d in scored]

reranked_docs = rerank(query, retrieved_docs)

print("\n[Re-ranker Results]")
for d in reranked_docs:
    print("-", d.page_content)

# --- (C) Context Optimizer: deduplicate + trim to top 2 ---
unique_texts = list(dict.fromkeys([d.page_content for d in reranked_docs]))
final_context = "\n".join(unique_texts[:2])

print("\n[Optimized Context]")
print(final_context)

# --- (D) Final LLM Answer ---
prompt = f"""You are a financial analyst.
Use the context to answer the question:

Context:
{final_context}

Question: {query}
"""
response = llm.invoke(prompt)
print("\n[LLM Answer]:", response.content)



[Retriever Results]
- Apple's revenue grew 10% due to iPhone sales.
- Apple's revenue grew 10% due to iPhone sales.
- Apple's revenue grew 10% due to iPhone sales.
- Apple's revenue grew 10% due to iPhone sales.
- Apple's revenue grew 10% due to iPhone sales.

[Re-ranker Results]
- Apple's revenue grew 10% due to iPhone sales.
- Apple's revenue grew 10% due to iPhone sales.
- Apple's revenue grew 10% due to iPhone sales.
- Apple's revenue grew 10% due to iPhone sales.
- Apple's revenue grew 10% due to iPhone sales.

[Optimized Context]
Apple's revenue grew 10% due to iPhone sales.

[LLM Answer]: Based on the context provided, where Apple’s revenue grew by 10% largely due to iPhone sales, it indicates that iPhone results are likely to remain strong. We could expect continued growth in iPhone sales driven by factors such as new product launches, increased demand in specific markets, or favorable consumer sentiment. However, to make a more accurate projection, it would also be important 