In [1]:
from langchain.chains import RetrievalQA
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.llms import OpenAI
from langchain.vectorstores import Chroma
from langchain_pinecone import PineconeVectorStore
from langchain_core.documents import Document
from typing import List, Dict, Any
import os
from dotenv import load_dotenv
import tiktoken
import chromadb
from pinecone import Pinecone

load_dotenv()

True

In [3]:
# LOAD EXISTING VECTOR DATABASE
import os
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
import chromadb

# 1. Define the path to the existing database
persist_dir = os.path.join(os.getcwd(), "../models/tal_chroma")
print(f"Loading ChromaDB from: {persist_dir}")

# 2. Initialize the embedding function
embeddings = OpenAIEmbeddings(
    model="text-embedding-3-small",
    openai_api_key=os.getenv("OPENAI_API_KEY"),
)

# 3. Connect to the persistent client
client = chromadb.PersistentClient(path=persist_dir)

# 4. Load the vector store
vector_store = Chroma(
    client=client,
    collection_name="tal_collection",
    embedding_function=embeddings,
)

print("Vector store loaded successfully.")

Loading ChromaDB from: /Users/matthewlu/Downloads/podcast-RAG/notebooks/../models/tal_chroma
Vector store loaded successfully.
Vector store loaded successfully.


Loading ChromaDB from: /Users/matthewlu/Downloads/podcast-RAG/notebooks/../models/tal_chroma
Vector store loaded successfully.
Vector store loaded successfully.


  vector_store = Chroma(


In [None]:
# OPTION 2: LOAD PINECONE VECTOR DATABASE
# Run this cell INSTEAD of the ChromaDB cell above if you want to use the cloud database.

import os
from langchain_openai import OpenAIEmbeddings
from langchain_pinecone import PineconeVectorStore
from pinecone import Pinecone

# 1. Initialize the embedding function
embeddings = OpenAIEmbeddings(
    model="text-embedding-3-small",
    openai_api_key=os.getenv("OPENAI_API_KEY"),
)

# 2. Connect to Pinecone
print("Connecting to Pinecone...")
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
index_name = "podcast-rag"

# 3. Load the vector store
vector_store = PineconeVectorStore(
    index_name=index_name,
    embedding=embeddings
)

print("Vector store loaded successfully from Pinecone.")

Connecting to Pinecone...
Vector store loaded successfully from Pinecone.


In [4]:
docs = vector_store.similarity_search("In episode 462, what did Ira Glass and Steve Blass talk about?", k=3)
for d in docs:
    print(d.metadata)
    print(d.page_content[:500], "...\n")


{'num_utterances': 14, 'act': 'credits', 'num_words': 219, 'roles': 'host, subject', 'speakers': 'adam davidson, alex blumberg, announcer, glen pizzolorusso, ira glass', 'chunk_utterance_start': 3456.93, 'episode': 'ep-355', 'chunk_utterance_end': 3540.00052154195, 'chunk_index': 0}
Alex Blumberg and Adam Davidson. Alex, my voice is so bad, I think maybe you should read the credits. Why don't you do it? All right, Ira. I hope you feel better. I'm going to bring Adam in here to help me to, since I've never done this before. Thanks today to Ellen Weiss at NPR who made this collaboration happen this week between the news division at NPR, where I work, All Things Considered, and This American Life. Where I work. Thanks also to Mary Ann Casavant, Anna Chai, Kevin Byers, the fant ...

{'num_utterances': 1, 'episode': 'ep-385', 'chunk_index': 0, 'chunk_utterance_end': 3464.92, 'chunk_utterance_start': 3461.47, 'num_words': 13, 'roles': 'host', 'act': 'credits', 'speakers': 'ira glass'}
I'm Ir

In [5]:
llm = ChatOpenAI(
    model_name="gpt-4o-mini",
    temperature=1,
    openai_api_key=os.getenv("OPENAI_API_KEY"),
    verbose=False,
    streaming=False

)

In [17]:
import re

def classify_intent(question: str):
    prompt = f"""Classify the user's input into one of these categories: GREETING, SPECIFIC_EPISODE_QUERY, GENERAL_KNOWLEDGE.
    
    - GREETING: Simple salutations like "hello", "hi", "good morning".
    - SPECIFIC_EPISODE_QUERY: Questions asking for specific details, stories, quotes, or content found within the episode transcripts. Examples: "What happened in episode 4?", "Stories about love", "What did Ira say about...".
    - GENERAL_KNOWLEDGE: General questions about the podcast (host, genre, history) that don't require transcript search, OR questions unrelated to the podcast. Examples: "What is This American Life?", "Who is Ira Glass?", "What is 2+2?".

    Input: {question}
    
    Respond ONLY with the category name.
    """
    return llm.invoke(prompt).content.strip().upper()

def ask_podcast_rag(question: str):
    # 0. Classify intent using LLM
    intent = classify_intent(question)

    if "GREETING" in intent:
        return (
            "Hello! I'm here to help you explore 'This American Life' transcripts. Ask me a question about an episode!",
            [],
            None,
        )

    if "GENERAL_KNOWLEDGE" in intent:
        return llm.invoke(question).content, [], None

    # 1. Try to extract an episode number for metadata filtering
    # This helps the vector store narrow down to the specific episode
    search_kwargs = {"k": 10}
    
    # Regex to find "episode <number>"
    match = re.search(r"episode\s+(\d+)", question, re.IGNORECASE)
    filter_used = None
    if match:
        ep_num = match.group(1)
        # Construct the ID format used in your metadata (e.g., "ep-462")
        filter_dict = {"episode": f"ep-{ep_num}"}
        search_kwargs["filter"] = filter_dict
        filter_used = filter_dict
    
    # 2. Retrieve documents
    docs = vector_store.similarity_search(question, **search_kwargs)
    
    # 3. Format context
    context = ""
    for i, doc in enumerate(docs):
        meta = doc.metadata
        context += f"\nDocument {i+1} (Episode {meta.get('episode')}, Act {meta.get('act')}):\n"
        context += f"Speakers: {meta.get('speakers', 'Unknown')}\n"
        context += f"Roles: {meta.get('roles', 'Unknown')}\n"
        context += f"Content: {doc.page_content}\n"
    
    # 4. Build prompt
    prompt = f"""You are a helpful assistant that answers questions strictly based on podcast transcripts.

You will be given a set of retrieved transcript excerpts as context.  
Use ONLY this context to answer the question.  
If the answer cannot be found or inferred from the context, say:  
"I do not have enough information from the provided context to answer."

Guidelines:
- Cite or reference relevant parts of the context in your reasoning.
- Do NOT invent facts, add details not in the context, or rely on prior general knowledge.
- If the question asks for something outside the given context, acknowledge the limitation.
- Provide concise, factual, and direct answers.
Context:
{context}

Question: {question}
"""

    # 5. Get answer
    response = llm.invoke(prompt).content
    
    return response, docs, filter_used

In [19]:
query = "who are the speakers in episode 22"
answer, source_docs, filter_used = ask_podcast_rag(query)

if filter_used:
    print(f"Applying metadata filter: {filter_used}")

# Clean up the response to ensure consistent formatting
final_answer = answer.strip()
if final_answer.startswith("Answer:"):
    final_answer = final_answer[len("Answer:"):].strip()

print("Answer:")
print(final_answer)
print()
print("-" * 50)

print("Context Used:")
for i, doc in enumerate(source_docs):
    print(f"Document {i+1} (Episode {doc.metadata.get('episode')}):")
    print(doc.page_content)
    print()
print("-" * 50)


Applying metadata filter: {'episode': 'ep-22'}
Answer:
The speakers in Episode 22 include Ira Glass (host), Sandra Tsing Loh (host), Eugene Loh, and the announcer.

--------------------------------------------------
Context Used:
Document 1 (Episode ep-22):
So my Mom had already talked to my older sister Randy out in California by the time she called me here in Chicago. And she explained to Randy, "It's a thing about communicating with your adult children and how to be close to your adult children." And she asked my sister what advice she would give to the group. Randy's advice was direct and to the point. She said, "Tell them to get a different leader." Adult children, supposedly adult children, and their supposedly adult parents in this addition of This American Life from WBEZ Chicago and Public Radio International. I'm Ira Glass, back for another week documenting everyday stories in these United States. Today, Act One, Me and My Mom. Act Two, Sandra Tsing Loh and her father. Act Thr

In [23]:

golden_set = [
    {
        "question": "What is the title of episode 462 of This American Life?",
        "ground_truth": "Own Worst Enemy"
    },
    {
        "question": "What is the title of episode 449 of This American Life?",
        "ground_truth": "Middle School"
    },
    {
        "question": "Which episode is titled 'In Defense of Ignorance'?",
        "ground_truth": "585"
    },
    {
        "question": "Which show is hosted by the program described as a weekly public radio program produced by WBEZ Chicago and syndicated by PRX?",
        "ground_truth": "This American Life"
    },
    {
        "question": "What is the original name of 'This American Life' when it first aired in 1995?",
        "ground_truth": "Your Radio Playhouse"
    },
    {
        "question": "Who's the host of this american life podcast",
        "ground_truth": "Ira Glass"
    },
    {
        "question": "What is the episode number of 'In Defense of Ignorance'?",
        "ground_truth": "585"
    },
    {
        "question": "Which This American Life episode archive page indicates that transcripts become available the week after broadcast?",
        "ground_truth": "FAQ page"
    },
    {
        "question": "Who is the interviewee in the prologue of Episode 1?",
        "ground_truth": "Joe Franklin"
    },
    {
        "question": "In Episode 200, which government department hired a former ad executive to run an information campaign?",
        "ground_truth": "US State Department"
    },
    {
        "question": "In Episode 500, what object does Ira Glass compare the milestone to clicking over?",
        "ground_truth": "odometer"
    },
    {"question": "Which office does Gordon Johndroe speak for in Act One of episode 227?", 
     "ground_truth": "White House Office of Homeland Security"    
    },
    
    {"question": "According to Sarah Koenig, why might the plants grow well in one area of her yard in episode 396",
     "ground_truth": "peeing"
     },
    
    {"question": "Where is This American Life produced",
     "ground_truth": "WBEZ Chicago"    
    },
    
    {"question": "In episode 575, who are the speakers?",
     "ground_truth": "Ira Glass, Sean Cole, Jose Miguel Sokoloff and Damien Cave"
     },
    
    {"question": "In episode 572, What roller coaster does Tess ride that includes a nine-story loop?",
     "ground_truth": "Revolution"},
    
    {"question": "In the prologue of episode 501, how does the open letter, supposedly from the foreign media, to every man, woman, and child in their country begin?",
     "ground_truth": "Dear South Africa, please get the F of the way"},
    {
    "question": "In the prologue of episode 542, what phone line do New Yorkers call for help with city services?",
    "ground_truth": "3-1-1"
    },
    {
    "question": "Who tells the story of the prison marriage in Act One of episode 542?",
    "ground_truth": "Christopher Rhoads"},
    {
    "question": "In Act Two of episode 542, what year did Tig Notaro and Jon Dore go on the filmed tour?",
    "ground_truth": "2013"
    },
    {
    "question": "In the prologue of episode 391, what show was Ira Glass listening to while cooking for his dog?",
    "ground_truth": "Marketplace"
    },
    {
    "question": "According to David Frum, in episode 391 which president had the worst economic performance among two-term presidents before Bush?",
    "ground_truth": "Jimmy Carter"
    },
    {
    "question": "In episode 391, what percentage did employer costs per worker rise during the Bush presidency according to David Frum?",
    "ground_truth": "25%"
    },
    {
    "question": "In episode 393, what city did the married woman in the Vows story travel to for her affair?",
    "ground_truth": "Paris."
    },
    {
    "question": "What magazine blog did Jessica Pressler write her discovery onin episode 393?",
    "ground_truth": "Daily Intel."
    }
    
    
    
    
    
]

def ask_vanilla_llm(question):
    """Asks the LLM without any retrieved context."""
    prompt = f"""You are a helpful assistant. Answer the following question to the best of your ability about the American Life Podcast.
    
    Question: {question}
    """
    return llm.invoke(prompt).content

def grade_answer(question, ground_truth, prediction):
    """
    Grades the answer based on string matching rules:
    1. Exact match (case-insensitive, stripped)
    2. Ground truth is a substring of prediction
    3. Prediction is a substring of ground truth
    4. All words in ground truth appear in prediction
    """
    gt_norm = ground_truth.lower().strip()
    pred_norm = prediction.lower().strip()
    
    # 1. Exact match
    if gt_norm == pred_norm:
        return True
        
    # 2. Ground truth is substring of prediction
    if gt_norm in pred_norm:
        return True
        
    # 3. Prediction is substring of ground truth
    if pred_norm in gt_norm:
        return True
        
    # 4. All words in ground truth appear in prediction
    gt_words = set(gt_norm.split())
    pred_words = set(pred_norm.split())
    if gt_words.issubset(pred_words):
        return True
        
    return False


# Clear any previous output before starting

# 2. Run Evaluation
results = []
print(f"Starting evaluation on {len(golden_set)} questions...")

for i, item in enumerate(golden_set):
    q = item["question"]
    gt = item["ground_truth"]
    
    print(f"\nTest {i+1}: {q}")
    
    # Test RAG
    # Note: ask_podcast_rag returns (response, docs, filter_used)
    rag_ans, _, _ = ask_podcast_rag(q)
    rag_correct = grade_answer(q, gt, rag_ans)
    print(f"  RAG: {'correct' if rag_correct else 'wrong'}")
    
    # Test Vanilla
    vanilla_ans = ask_vanilla_llm(q)
    vanilla_correct = grade_answer(q, gt, vanilla_ans)
    print(f"  Vanilla: {'correct' if vanilla_correct else 'wrong'}")
    
    results.append({
        "question": q,
        "ground_truth": gt,
        "rag_correct": rag_correct,
        "vanilla_correct": vanilla_correct,
        "rag_ans": rag_ans,
        "vanilla_ans": vanilla_ans
    })

# 3. Calculate Metrics
rag_accuracy = sum(1 for r in results if r["rag_correct"]) / len(results) * 100
vanilla_accuracy = sum(1 for r in results if r["vanilla_correct"]) / len(results) * 100

print("-" * 50)
print(f"Final Results:")
print(f"RAG Accuracy:     {rag_accuracy:.1f}%")
print(f"Vanilla Accuracy: {vanilla_accuracy:.1f}%")
print("-" * 50)

# Print failures to analyze
# print("\nAnalysis of RAG Failures:")
# for r in results:
#     if not r["rag_correct"]:
#         print(f"Q: {r['question']}")
#         print(f"Expected: {r['ground_truth']}")
#         print(f"Got: {r['rag_ans']}\n")

Starting evaluation on 25 questions...

Test 1: What is the title of episode 462 of This American Life?
  RAG: wrong
  RAG: wrong
  Vanilla: wrong

Test 2: What is the title of episode 449 of This American Life?
  Vanilla: wrong

Test 2: What is the title of episode 449 of This American Life?
  RAG: wrong
  RAG: wrong
  Vanilla: wrong

Test 3: Which episode is titled 'In Defense of Ignorance'?
  Vanilla: wrong

Test 3: Which episode is titled 'In Defense of Ignorance'?
  RAG: correct
  RAG: correct
  Vanilla: wrong

Test 4: Which show is hosted by the program described as a weekly public radio program produced by WBEZ Chicago and syndicated by PRX?
  Vanilla: wrong

Test 4: Which show is hosted by the program described as a weekly public radio program produced by WBEZ Chicago and syndicated by PRX?
  RAG: correct
  RAG: correct
  Vanilla: correct

Test 5: What is the original name of 'This American Life' when it first aired in 1995?
  Vanilla: correct

Test 5: What is the original name