In [2]:
%pip install -qU langchain-pinecone pinecone-notebooks
%pip install --upgrade --quiet langchain-text-splitters tiktoken
%pip install langchain-openai
%pip install datasets

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


### Connect to OpenAI

### Retrieve Transcripts Dataframe

### Setup and load environments

In [4]:
# 1. Setup
import pandas as pd
from langchain.embeddings import OpenAIEmbeddings
from langchain_pinecone import PineconeVectorStore  # ✅ NEW correct import
from langchain.text_splitter import CharacterTextSplitter
from langchain.schema import Document
from pinecone import Pinecone, ServerlessSpec
from dotenv import load_dotenv
import os
from getpass import getpass

# Load environment variables
_ = load_dotenv()
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY") or getpass("Enter your Pinecone API key: ")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") or getpass("Enter your OpenAI API key: ")


### Chunk data

In [5]:
import math
from langchain.schema import Document

# 1. Load the entire dataframe (no head())
df = pd.read_pickle("/Users/test/Desktop/ironhack_labs/YouTube_ChatBot_Final/datasets/dataframe.pkl")
print(f"✅ Loaded dataframe with {len(df)} rows")
print(df.head(2))  # preview first 2 rows

# 2. Prepare documents and sources
texts = df["text"].tolist()
sources = df["source_file"].tolist()
print(f"✅ Prepared {len(texts)} texts and sources")

# 3. Split texts into chunks with metadata
splitter = CharacterTextSplitter(chunk_size=400, chunk_overlap=200)
documents = [Document(page_content=t, metadata={"source_file": s}) for t, s in zip(texts, sources)]
print(f"✅ Created {len(documents)} Document objects")

split_docs = splitter.split_documents(documents)
print(f"✅ After splitting, got {len(split_docs)} document chunks")
print(f"Sample chunk content:\n{split_docs[0].page_content}")
print(f"Sample chunk metadata:\n{split_docs[0].metadata}")



✅ Loaded dataframe with 77214 rows
                                       text  \
0  so planets become more interesting moons   
1       become places to go and revisit but   

                                         source_file  
0  40 - Neil deGrasse Tyson and Bill Nye Catch Up...  
1  40 - Neil deGrasse Tyson and Bill Nye Catch Up...  
✅ Prepared 77214 texts and sources
✅ Created 77214 Document objects
✅ After splitting, got 77214 document chunks
Sample chunk content:
so planets become more interesting moons
Sample chunk metadata:
{'source_file': '40 - Neil deGrasse Tyson and Bill Nye Catch Up.en.txt'}


### Create Pinecone index, embeddings and vectors

In [43]:
# 4. Initialize Pinecone client and index
index_name = "youtube-transcripts"
dimension = 1536

pc = Pinecone(api_key=PINECONE_API_KEY)

if index_name not in [index["name"] for index in pc.list_indexes()]:
    pc.create_index(
        name=index_name,
        dimension=dimension,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1")
    )
    print(f"✅ Created index: {index_name}")
else:
    print(f"✅ Index '{index_name}' already exists.")

index = pc.Index(index_name)
print(f"✅ Connected to Pinecone index: {index_name}")

# 5. Initialize OpenAI embeddings
embedding = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
print("✅ Initialized OpenAI embeddings")

# 6. Create PineconeVectorStore instance
vectordb = PineconeVectorStore(index, embedding)

# 7. Upload in smaller batches manually to avoid request size limit errors
from tqdm import tqdm
import math
batch_size = 30  # Adjust batch size if needed to avoid errors
num_batches = math.ceil(len(split_docs) / batch_size)

print(f"Total document chunks: {len(split_docs)}")
print(f"Uploading in batches of {batch_size}, total batches: {num_batches}")

for i in tqdm(range(num_batches), desc="Uploading batches"):
    batch_docs = split_docs[i * batch_size : (i + 1) * batch_size]
    vectordb.add_documents(batch_docs)

print(f"✅ Successfully stored all {len(split_docs)} document chunks in Pinecone")


✅ Index 'youtube-transcripts' already exists.
✅ Connected to Pinecone index: youtube-transcripts
✅ Initialized OpenAI embeddings
Total document chunks: 77214
Uploading in batches of 30, total batches: 2574


Uploading batches: 100%|██████████| 2574/2574 [2:06:04<00:00,  2.94s/it]  

✅ Successfully stored all 77214 document chunks in Pinecone





### Get data from pinecone after storing successfully

In [16]:
def reconnect_vectorstore(index_name="youtube-transcripts"):
    from langchain.embeddings import OpenAIEmbeddings
    from langchain_pinecone import PineconeVectorStore
    from pinecone import Pinecone
    from dotenv import load_dotenv
    import os

    load_dotenv()
    PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
    OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

    embedding = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
    pc = Pinecone(api_key=PINECONE_API_KEY)
    index = pc.Index(index_name)
    vectordb = PineconeVectorStore(index, embedding)
    retriever = vectordb.as_retriever(search_kwargs={"k": 5})
    print("✅ Reconnected to Pinecone vectorstore and retriever is ready")
    return vectordb, retriever


### Set up retriever

In [17]:
# 4. Set up retriever
vectordb, retriever = reconnect_vectorstore()
print("✅ Retriever initialized")

# 5. Memory & Tools
from langchain.memory import ConversationBufferMemory
from langchain.agents import initialize_agent, AgentType, Tool
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA

llm = ChatOpenAI(temperature=0, model="gpt-4", openai_api_key=OPENAI_API_KEY)
print("✅ Initialized LLM")

# Define RetrievalQA chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    return_source_documents=True
)
print("✅ RetrievalQA chain created")


✅ Reconnected to Pinecone vectorstore and retriever is ready
✅ Retriever initialized
✅ Initialized LLM
✅ RetrievalQA chain created


### Define function for answering questions with sources

In [9]:

def answer_with_sources(input_text: str):
    print(f"\n📝 Query: {input_text}")
    result = qa_chain({"query": input_text})
    answer = result["result"]
    sources = list(set(doc.metadata.get("source_file", "Unknown") for doc in result["source_documents"]))
    print(f"🗒️ Retrieved {len(sources)} source documents")
    return f"{answer}\n\nSources:\n" + "\n".join(sources)

### Setup tools and agent with Langsmith tracing

In [10]:
from langsmith import traceable
import os
# !pip install -U langsmith openai

# Set LangSmith env vars
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = "LANGCHAIN_API_KEY"
os.environ["LANGCHAIN_PROJECT"] = "youtube-rag"

from langchain.agents import initialize_agent

# Define tool
tools = [
    Tool(
        name="YouTubeTranscriptQA",
        func=answer_with_sources,
        description="Useful for answering questions about YouTube video transcripts. Input should be a fully formed question."
    )
]
print("✅ Tools defined")

# Conversation memory
memory = ConversationBufferMemory(memory_key="chat_history")
print("✅ Conversation memory initialized")

# 6. Initialize Agent with tools and memory
agent = initialize_agent(
    agent=AgentType.CONVERSATIONAL_REACT_DESCRIPTION,
    tools=tools,
    llm=llm,
    verbose=True,
    memory=memory,
    max_iterations=3
)


print("✅ Agent initialized and ready")


@traceable(name="YouTube RAG Trace")
def run_agent():
    agent.run("Who was Albert Einstein?")

result = run_agent()

✅ Tools defined
✅ Conversation memory initialized
✅ Agent initialized and ready


[1m> Entering new AgentExecutor chain...[0m


  memory = ConversationBufferMemory(memory_key="chat_history")
  agent = initialize_agent(
  agent.run("Who was Albert Einstein?")


[32;1m[1;3mThought: Do I need to use a tool? No
AI: Albert Einstein was a renowned physicist who is best known for his theory of relativity, which revolutionized the field of theoretical physics. He was born in Germany in 1879 and showed an early aptitude for mathematics and physics. His work on the photoelectric effect won him the Nobel Prize in Physics in 1921. Einstein's most famous equation, E=mc^2, demonstrates the equivalence of energy (E) and mass (m), with "c" being the speed of light in a vacuum. This equation has been foundational to the development of nuclear energy. Einstein emigrated to the United States in the 1930s due to the rise of the Nazi regime in Germany. He accepted a position at the Institute for Advanced Study in Princeton, New Jersey, where he remained until his retirement. He passed away in 1955.[0m

[1m> Finished chain.[0m


### Prompt agent

In [None]:
prompt_templates = {
    "summary": "Please provide a concise summary of the following topic: '{}'",
    "source": "Please provide the video source for the following topic: '{}'",
    "explanation": "Explain in detail: '{}'",
    "compare": "Compare and contrast these two concepts: '{}' and '{}'",
    "timeline": "Give me a timeline of events related to: '{}'",
    "faq": "What are the most frequently asked questions about '{}', and their answers?",
    "key_points": "List the key points covered in: '{}'",
    "step_by_step": "Provide a step-by-step guide on how to: '{}'",
    "examples": "Give me examples related to '{}'",
    "pros_cons": "What are the pros and cons of '{}'",
    "common_mistakes": "What are the common mistakes people make regarding '{}', and how to avoid them?",
    
}

def ask_agent(agent, prompt_type, *args):
    if prompt_type not in prompt_templates:
        raise ValueError(f"Prompt type '{prompt_type}' not supported.")
    prompt = prompt_templates[prompt_type].format(*args)
    print(f"Prompt to model:\n{prompt}\n")
    response = agent.run(prompt)
    return response



In [12]:
# user_question = input("Who is Bill Nye?")
# result = ask_agent_freeform(agent, user_question)
# print(result)

result = ask_agent(agent, "compare", "string theory", "pinpoint theory")
print(result)

Prompt to model:
Compare and contrast these two concepts: 'string theory' and 'pinpoint theory'



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: Do I need to use a tool? No
AI: It seems there might be a bit of confusion in your question. While 'string theory' is a well-known concept in theoretical physics, 'pinpoint theory' is not a recognized term in the field. 

String theory is a theoretical framework in which the point-like particles of particle physics are replaced by one-dimensional objects called strings. It describes how these strings propagate through space and interact with each other. The theory has the potential to provide a unified description of gravity and particle physics, as it incorporates quantum mechanics and general relativity.

On the other hand, 'pinpoint theory' doesn't appear to be a standard term in physics. If you're referring to a specific concept, could you please provide more context or details?[0m

[1m> Finished chain.[0m
It seems

In [52]:
result = ask_agent(agent, "explanation", "Black holes")
print(result)

Prompt to model:
Explain in detail: 'Black holes'



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: Do I need to use a tool? No
AI: A black hole is a region of spacetime where gravity is so strong that nothing, not even particles or electromagnetic radiation such as light, can escape from it. The theory of general relativity predicts that a sufficiently compact mass can deform spacetime to form a black hole.

Black holes are characterized by the "event horizon," a boundary in spacetime through which matter and light can only pass inward towards the mass of the black hole. Nothing, not even light, can escape from inside the event horizon. The size of the event horizon is proportional to the mass of the black hole.

There are three types of black holes: stellar black holes, supermassive black holes, and intermediate black holes. Stellar black holes are formed when a massive star collapses under its own gravity. Supermassive black holes, which are millions to billions

In [53]:
result = ask_agent(agent, "source", "Black holes")
print(result)

Prompt to model:
Please provide the video source for the following topic: 'Black holes'



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: Do I need to use a tool? Yes
Action: YouTubeTranscriptQA
Action Input: Please provide the video source for the following topic: 'Black holes'[0m
📝 Query: Please provide the video source for the following topic: 'Black holes'
🗒️ Retrieved 5 source documents

Observation: [36;1m[1;3mI'm sorry, but as a text-based AI, I don't have the ability to provide video sources.

Sources:
52 - The Sounds of Space ｜ StarTalk Live! at Guild Hall.en.txt
55 - Answering Fan Queries About Strange Matter, the Insides of Black Holes & More.en.txt
17 - When Black Holes Collide, with Nergis Mavalvala.en.txt
57 - Scientists Discuss NASA’s Strategy For Finding Alien Life in the Universe.en.txt
25 - Neil & Malcolm Gladwell Explore What Shapes Our Society.en.txt[0m
Thought:[32;1m[1;3mDo I need to use a tool? No
AI: Based on the information from the vi

In [49]:
result = ask_agent(agent, "source", "string theory")
print(result)

Prompt to model:
Please provide the video source for the following topic: 'string theory'



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: Do I need to use a tool? Yes
Action: YouTubeTranscriptQA
Action Input: 'string theory'[0m
📝 Query: 'string theory'
🗒️ Retrieved 1 source documents

Observation: [36;1m[1;3mString theory is a theoretical framework in which the point-like particles of particle physics are replaced by one-dimensional objects called strings. It describes how these strings propagate through space and interact with each other. The idea behind string theory is to reconcile quantum physics and general relativity, and it has been proposed as a theory of everything. It's a complex and highly mathematical theory that has yet to be fully proven or disproven.

Sources:
01 - Is The Universe Made of Tiny Vibrating Strings？ With Lara Anderson.en.txt[0m
Thought:[32;1m[1;3mDo I need to use a tool? No
AI: Based on the information from the video transcript, 

### Evaluate with ROUGE

In [70]:
import evaluate
from nltk.tokenize import sent_tokenize
import nltk

# Ensure required tokenizer
nltk.download('punkt')

# Load ROUGE metric
rouge = evaluate.load("rouge")

def compute_rouge(generated_text, reference_text):
    """
    Compute ROUGE score between a reference answer and a generated answer.
    """
    # Ensure text is split into sentences with newlines
    generated = "\n".join(sent_tokenize(generated_text.strip()))
    reference = "\n".join(sent_tokenize(reference_text.strip()))

    result = rouge.compute(
        predictions=[generated],
        references=[reference],
        use_stemmer=True,
    )
    return result


[nltk_data] Downloading package punkt to /Users/test/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Downloading builder script: 0.00B [00:00, ?B/s]

In [71]:
def answer_with_rouge(input_text: str, reference_answer: str = None):
    print(f"\n📝 Query: {input_text}")
    result = qa_chain({"query": input_text})
    answer = result["result"]
    sources = list(set(doc.metadata.get("source_file", "Unknown") for doc in result["source_documents"]))
    print(f"🗒️ Retrieved {len(sources)} source documents")
    
    output = f"{answer}\n\nSources:\n" + "\n".join(sources)
    
    if reference_answer:
        rouge_scores = compute_rouge(answer, reference_answer)
        print(f"🔴 ROUGE scores:")
        for k, v in rouge_scores.items():
            print(f"  {k.upper()}: {v:.4f}")
        output += "\n\nROUGE scores:\n" + "\n".join(f"{k.upper()}: {v:.4f}" for k, v in rouge_scores.items())
        
    return output


In [72]:
query = "What is at the center of a black hole?"

reference = "The singularity at the center of a black hole is the ultimate no man's land: a place where matter is compressed down to an infinitely tiny point, and all conceptions of time and space completely break down"
response = answer_with_rouge(query, reference)
print(response)


📝 Query: What is at the center of a black hole?
🗒️ Retrieved 5 source documents
🔴 ROUGE scores:
  ROUGE1: 0.4658
  ROUGE2: 0.1690
  ROUGEL: 0.3288
  ROUGELSUM: 0.3014
The center of a black hole is thought to contain a gravitational singularity, a point where the gravitational field becomes infinite. However, our understanding of black holes is still limited and this is largely theoretical.

Sources:
06 - Tackling the Biggest Unsolved Problems in Math with 3Blue1Brown.en.txt
52 - The Sounds of Space ｜ StarTalk Live! at Guild Hall.en.txt
41 - The Science of Interstellar with Science Advisor, Kip Thorne.en.txt
31 - Neil and a Particle Physicist Discuss Why There’s Something Instead of Nothing.en.txt
12 - Unpacking Einstein’s Greatest Papers, with Janna Levin.en.txt

ROUGE scores:
ROUGE1: 0.4658
ROUGE2: 0.1690
ROUGEL: 0.3288
ROUGELSUM: 0.3014


In [73]:
query = "What is Bill Nye Famous for?"

reference = "He is best known as the host of the science education television show Bill Nye the Science Guy (1993–1999) and as a science educator in pop culture"
response = answer_with_rouge(query, reference)
print(response)


📝 Query: What is Bill Nye Famous for?
🗒️ Retrieved 1 source documents
🔴 ROUGE scores:
  ROUGE1: 0.5000
  ROUGE2: 0.3478
  ROUGEL: 0.5000
  ROUGELSUM: 0.5000
Bill Nye is famous for his role as the host of the educational TV program "Bill Nye the Science Guy."

Sources:
40 - Neil deGrasse Tyson and Bill Nye Catch Up.en.txt

ROUGE scores:
ROUGE1: 0.5000
ROUGE2: 0.3478
ROUGEL: 0.5000
ROUGELSUM: 0.5000


In [77]:
query = "Who is Sara Imari Walker?"

reference = "Sara Imari Walker is an American theoretical physicist and astrobiologist with research interests in the origins of life, astrobiology, physics of life, emergence, complex and dynamical systems, and artificial life"
response = answer_with_rouge(query, reference)
print(response)


📝 Query: Who is Sara Imari Walker?
🗒️ Retrieved 2 source documents
🔴 ROUGE scores:
  ROUGE1: 0.3333
  ROUGE2: 0.2353
  ROUGEL: 0.3333
  ROUGELSUM: 0.3333
Sara Imari Walker is an astrobiologist.

Sources:
50 - Neil & Sara Imari Walker Discuss New Theories on The Origins of Life in the Universe.en.txt
06 - Tackling the Biggest Unsolved Problems in Math with 3Blue1Brown.en.txt

ROUGE scores:
ROUGE1: 0.3333
ROUGE2: 0.2353
ROUGEL: 0.3333
ROUGELSUM: 0.3333


In [78]:
query = "What is Sara Imari Walker known for?"

reference = "Sara Imari Walker is recognized for developing assembly theory, an informational framework for identifying life based on its complexity. "
response = answer_with_rouge(query, reference)
print(response)


📝 Query: What is Sara Imari Walker known for?
🗒️ Retrieved 3 source documents
🔴 ROUGE scores:
  ROUGE1: 0.3243
  ROUGE2: 0.1143
  ROUGEL: 0.2162
  ROUGELSUM: 0.2162
I'm sorry, but the text doesn't provide information on what Sarah Imari Walker is known for.

Sources:
50 - Neil & Sara Imari Walker Discuss New Theories on The Origins of Life in the Universe.en.txt
06 - Tackling the Biggest Unsolved Problems in Math with 3Blue1Brown.en.txt
33 - The Elements of Marie Curie with Dava Sobel.en.txt

ROUGE scores:
ROUGE1: 0.3243
ROUGE2: 0.1143
ROUGEL: 0.2162
ROUGELSUM: 0.2162


In [None]:
#voice input
# !pip install openai-whisper pyaudio pyttsx3
# !pip install sounddevice wavio



### Creates user audio recording functionality

In [13]:
import whisper
import sounddevice as sd
import numpy as np
import wavio
from gtts import gTTS
from playsound import playsound
import os
import uuid
import openai


import openai
import os

openai.api_key = os.getenv("OPENAI_API_KEY")


# Load Whisper model
model = whisper.load_model("base")

def record_audio(duration=5, fs=16000):
    print("🎙️ Recording...")
    recording = sd.rec(int(duration * fs), samplerate=fs, channels=1)
    sd.wait()
    wavio.write("user_input.wav", recording, fs, sampwidth=2)
    print("🎙️ Recording finished")

def transcribe_audio(filepath="user_input.wav"):
    print("📝 Transcribing...")
    result = model.transcribe(filepath)
    text = result["text"].strip()
    print(f"🗣️ You said: {text}")
    return text

def speak_text(text):
    filename = f"{uuid.uuid4().hex}.mp3"
    tts = gTTS(text=text, lang='en')
    tts.save(filename)
    playsound(filename)
    os.remove(filename)

def shorten_text(text, max_ratio=0.5):
    # Prompt the agent or LLM to summarize or shorten the text
    prompt = f"Please shorten the following text to about {int(max_ratio*100)}% of its length, keeping the key information:\n\n{text}"
    shortened = agent.run(prompt)
    return shortened

def answer_with_sources(question):
    full_answer = agent.run(question)
    short_answer = shorten_text(full_answer)
    return short_answer

# One interaction
def voice_rag_interaction():
    record_audio()
    question = transcribe_audio()
    answer = answer_with_sources(question)
    print(f"🤖 Answer:\n{answer}")
    speak_text(answer)


In [92]:
while True:
    voice_rag_interaction()
    cont = input("Ask another question? (y/n): ")
    if cont.strip().lower() != "y":
        break


🎙️ Recording...
🎙️ Recording finished
📝 Transcribing...




🗣️ You said: Who is Albert Einstein?


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAI: Albert Einstein was a renowned physicist who is best known for his theory of relativity, one of the two pillars of modern physics (the other being quantum mechanics). He was born in Germany in 1879 and later became a Swiss citizen. His work revolutionized our understanding of the basic laws of the universe.

Einstein's most famous equation, E=mc^2, demonstrates the equivalence of energy (E) and mass (m), with "c" being the speed of light in a vacuum. This equation has profound implications for the understanding of energy and matter.

In 1921, Einstein was awarded the Nobel Prize in Physics for his explanation of the photoelectric effect, which demonstrated the particle-like properties of light. This work was a foundational part of the development of quantum mechanics.

Einstein was also known for his views on philosophy and non-violence. He emigrated to the United States in 1933, escap



🗣️ You said: What is the source video for Albert?


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: Do I need to use a tool? Yes
Action: YouTubeTranscriptQA
Action Input: What is the source video for Albert Einstein?[0m
📝 Query: What is the source video for Albert Einstein?
🗒️ Retrieved 4 source documents

Observation: [36;1m[1;3mI'm sorry, but the information provided doesn't specify a source video for Albert Einstein.

Sources:
33 - The Elements of Marie Curie with Dava Sobel.en.txt
04 - Celebrating 100 Years of Quantum Physics with Brian Greene, Hasan Minhaj & Janna Levin.en.txt
17 - When Black Holes Collide, with Nergis Mavalvala.en.txt
12 - Unpacking Einstein’s Greatest Papers, with Janna Levin.en.txt[0m
Thought:[32;1m[1;3mDo I need to use a tool? No
AI: Based on the information available, there are several videos that mention Albert Einstein. Here are a few:

1. "The Elements of Marie Curie with Dava Sobel"
2. "Celebrating 100 Years of Quantum Physics wi

In [19]:
import gradio as gr
import whisper
import uuid
import os
from gtts import gTTS

# Load Whisper model once
model = whisper.load_model("base")

# Your agent setup code must be above this (agent, answer_with_sources, etc.)

def transcribe_audio_file(audio_file):
    print("📝 Transcribing audio...")
    result = model.transcribe(audio_file.name)
    text = result["text"].strip()
    print(f"🗣️ Transcribed text: {text}")
    return text

def generate_tts(text):
    filename = f"{uuid.uuid4().hex}.mp3"
    tts = gTTS(text=text, lang='en')
    tts.save(filename)
    return filename

def gradio_qa(text_input, audio_input):
    # Decide input text
    if text_input and text_input.strip():
        question = text_input.strip()
        print("Using text input from user")
    elif audio_input is not None:
        question = transcribe_audio_file(audio_input)
        print("Using transcribed audio input")
    else:
        return "Please enter a question or upload an audio.", "", None

    # Generate answer from your agent
    answer = answer_with_sources(question)

    # Generate TTS audio file for answer
    audio_path = generate_tts(answer)

    return question, answer, audio_path

iface = gr.Interface(
    fn=gradio_qa,
    inputs=[
        gr.Textbox(label="Enter your question (text)", lines=2, placeholder="Type your question here..."),
        gr.Audio(type="filepath", label="Or upload your question as audio")
    ],
    outputs=[
        gr.Textbox(label="Processed Question"),
        gr.Textbox(label="Answer"),
        gr.Audio(label="Answer Audio", type="filepath")
    ],
    title="YouTube Transcript QA (Text & Voice Input)",
    description="Ask questions via text or audio and get text + spoken answers from the YouTube transcript-based agent.",
    allow_flagging="never"
)

if __name__ == "__main__":
    iface.launch(share=True)




* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://512e510ae7d2c76974.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Using text input from user


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: Do I need to use a tool? No
AI: A black hole is a region of space where gravity is so strong that nothing, not even light, can escape from it. They are formed when a massive star collapses under its own gravity after exhausting its nuclear fuel. This collapse results in a singularity, a point of infinite density, surrounded by an event horizon, which is the boundary beyond which nothing can escape.

Black holes are predicted by Einstein's theory of general relativity. Although we can't directly observe black holes due to their strong gravity preventing light from escaping, we can infer their presence by their effects on nearby matter. For example, if a black hole passes through a cloud of interstellar matter, it will draw matter inward in a process known as accretion. This accreted matter can form an accretion disk around the black hole, which can emit X-rays and other forms of energy that 

Traceback (most recent call last):
  File "/opt/anaconda3/envs/cleanenv/lib/python3.10/site-packages/gradio/queueing.py", line 626, in process_events
    response = await route_utils.call_process_api(
  File "/opt/anaconda3/envs/cleanenv/lib/python3.10/site-packages/gradio/route_utils.py", line 322, in call_process_api
    output = await app.get_blocks().process_api(
  File "/opt/anaconda3/envs/cleanenv/lib/python3.10/site-packages/gradio/blocks.py", line 2220, in process_api
    result = await self.call_function(
  File "/opt/anaconda3/envs/cleanenv/lib/python3.10/site-packages/gradio/blocks.py", line 1731, in call_function
    prediction = await anyio.to_thread.run_sync(  # type: ignore
  File "/opt/anaconda3/envs/cleanenv/lib/python3.10/site-packages/anyio/to_thread.py", line 56, in run_sync
    return await get_async_backend().run_sync_in_worker_thread(
  File "/opt/anaconda3/envs/cleanenv/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 2505, in run_sync_in_worker_t

Using text input from user


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAI: Bill Nye, often known as Bill Nye the Science Guy, is an American science communicator, television presenter, and mechanical engineer. He is best known for his television show "Bill Nye the Science Guy," a children's show that aired in the 1990s and aimed to teach science in a fun and engaging way. Nye has since become a popular figure in the U.S. for his efforts to promote science education and combat climate change. He has written several books on science and currently serves as the CEO of The Planetary Society, an organization dedicated to advancing space science and exploration.[0m

[1m> Finished chain.[0m


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAI: Bill Nye, also known as "the Science Guy," is an American science communicator and TV presenter. He's famous for his 1990s children's show teaching science in an engaging way. Nye is recognized for promoting science educat

Traceback (most recent call last):
  File "/opt/anaconda3/envs/cleanenv/lib/python3.10/site-packages/gradio/queueing.py", line 626, in process_events
    response = await route_utils.call_process_api(
  File "/opt/anaconda3/envs/cleanenv/lib/python3.10/site-packages/gradio/route_utils.py", line 322, in call_process_api
    output = await app.get_blocks().process_api(
  File "/opt/anaconda3/envs/cleanenv/lib/python3.10/site-packages/gradio/blocks.py", line 2220, in process_api
    result = await self.call_function(
  File "/opt/anaconda3/envs/cleanenv/lib/python3.10/site-packages/gradio/blocks.py", line 1731, in call_function
    prediction = await anyio.to_thread.run_sync(  # type: ignore
  File "/opt/anaconda3/envs/cleanenv/lib/python3.10/site-packages/anyio/to_thread.py", line 56, in run_sync
    return await get_async_backend().run_sync_in_worker_thread(
  File "/opt/anaconda3/envs/cleanenv/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 2505, in run_sync_in_worker_t

Using text input from user


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAI: Black holes are regions in space with extreme gravity that captures everything, including light. They form from star collapse, resulting in a singularity and an event horizon. Their effects on nearby matter indicate their presence. They can be a few times the sun's mass or supermassive, typically found in galaxy centers.[0m

[1m> Finished chain.[0m


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAI: Black holes, with extreme gravity, capture all, including light. Formed from star collapse, they have a singularity and event horizon. Their influence on nearby matter reveals their existence. Sizes range from a few times the sun's mass to supermassive, usually in galaxy centers.[0m

[1m> Finished chain.[0m
Using text input from user


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAI: Black holes are regions in space with intense gravity that captures everything, inclu