### Setup and load environments

In [15]:
# 1. Setup
import pandas as pd
from langchain.embeddings import OpenAIEmbeddings
from langchain_pinecone import PineconeVectorStore  # ✅ NEW correct import
from langchain.text_splitter import CharacterTextSplitter
from langchain.schema import Document
from pinecone import Pinecone, ServerlessSpec
from dotenv import load_dotenv
import os
from getpass import getpass

# Load environment variables
_ = load_dotenv()
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY") or getpass("Enter your Pinecone API key: ")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") or getpass("Enter your OpenAI API key: ")


### Get data from pinecone after storing successfully

In [None]:
def reconnect_vectorstore(index_name="youtube-transcripts"):
    from langchain.embeddings import OpenAIEmbeddings
    from langchain_pinecone import PineconeVectorStore
    from pinecone import Pinecone
    from dotenv import load_dotenv
    import os

    load_dotenv()
    PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
    OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

    embedding = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
    pc = Pinecone(api_key=PINECONE_API_KEY)
    index = pc.Index(index_name)

    # 🔍 Print vector count directly
    stats = index.describe_index_stats()
    print(f"📦 Index contains {stats['total_vector_count']} vectors")

    vectordb = PineconeVectorStore(index, embedding)
    retriever = vectordb.as_retriever(search_kwargs={"k": 5})

    docs = retriever.get_relevant_documents("What is this video about?")
    print(f"Retrieved {len(docs)} docs")
    for d in docs:
        print("-----")
        print(d.page_content)

    print("✅ Reconnected to Pinecone vectorstore and retriever is ready")
    return vectordb, retriever



### Set up retriever

In [25]:
# 4. Set up retriever
vectordb, retriever = reconnect_vectorstore()
print("✅ Retriever initialized")

# 5. Memory & Tools
from langchain.memory import ConversationBufferMemory
from langchain.agents import initialize_agent, AgentType, Tool
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA

llm = ChatOpenAI(temperature=0, model="gpt-4", openai_api_key=OPENAI_API_KEY)
print("✅ Initialized LLM")

# Define RetrievalQA chain
from utils import init_qa_chain
qa_chain = init_qa_chain()

print("✅ RetrievalQA chain created")


📦 Index contains 78354 vectors
Retrieved 5 docs
-----
of time. I want to watch videos about
-----
for those who can see this on video uh
-----
a short video in a series that we do
-----
know made this video that was giving a
-----
term but there is a very popular video
✅ Reconnected to Pinecone vectorstore and retriever is ready
✅ Retriever initialized
✅ Initialized LLM
✅ RetrievalQA chain created


### Define function for answering questions with sources

In [26]:

def answer_with_sources(input_text: str):
    print(f"\n📝 Query: {input_text}")
    result = qa_chain({"query": input_text})
    answer = result["result"]
    sources = list(set(doc.metadata.get("source_file", "Unknown") for doc in result["source_documents"]))
    print(f"🗒️ Retrieved {len(sources)} source documents")
    return f"{answer}\n\nSources:\n" + "\n".join(sources)

### Setup tools and agent with Langsmith tracing

In [27]:
from langsmith import traceable
import os
# !pip install -U langsmith openai

# Set LangSmith env vars
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = "LANGCHAIN_API_KEY"
os.environ["LANGCHAIN_PROJECT"] = "youtube-rag"

from langchain.agents import initialize_agent

# Define tool
tools = [
    Tool(
        name="YouTubeTranscriptQA",
        func=answer_with_sources,
        description="Useful for answering questions about YouTube video transcripts. Input should be a fully formed question."
    )
]
print("✅ Tools defined")

# Conversation memory
memory = ConversationBufferMemory(memory_key="chat_history")
print("✅ Conversation memory initialized")

# 6. Initialize Agent with tools and memory
agent = initialize_agent(
    agent=AgentType.CONVERSATIONAL_REACT_DESCRIPTION,
    tools=tools,
    llm=llm,
    verbose=True,
    memory=memory,
    max_iterations=3
)

print("✅ Agent initialized and ready")

@traceable(name="YouTube RAG Trace")
def run_agent():
    agent.run("Who was Albert Einstein?")

result = run_agent()

✅ Tools defined
✅ Conversation memory initialized
✅ Agent initialized and ready


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: Do I need to use a tool? No
AI: Albert Einstein was a renowned physicist who is best known for his theory of relativity, which revolutionized the field of theoretical physics. He was born in Germany in 1879 and showed an early aptitude for mathematics and physics. His work on the photoelectric effect won him the Nobel Prize in Physics in 1921. Einstein's most famous equation, E=mc^2, demonstrates the equivalence of energy (E) and mass (m), with "c" being the speed of light in a vacuum. This equation has been fundamental to the development of nuclear energy. Einstein emigrated to the United States in the 1930s due to the rise of the Nazi regime in Germany. He accepted a position at the Institute for Advanced Study in Princeton, New Jersey, where he remained until his retirement. He passed away in 1955.[0m

[1m> Finished chain.[0m


### Prompt agent

In [28]:
prompt_templates = {
    "summary": "Please provide a concise summary of the following topic: '{}'",
    "source": "Please provide the video source for the following topic: '{}'",
    "explanation": "Explain in detail: '{}'",
    "compare": "Compare and contrast these two concepts: '{}' and '{}'",
    "timeline": "Give me a timeline of events related to: '{}'",
    "faq": "What are the most frequently asked questions about '{}', and their answers?",
    "key_points": "List the key points covered in: '{}'",
    "step_by_step": "Provide a step-by-step guide on how to: '{}'",
    "examples": "Give me examples related to '{}'",
    "pros_cons": "What are the pros and cons of '{}'",
    "common_mistakes": "What are the common mistakes people make regarding '{}', and how to avoid them?",
    
}

def ask_agent(agent, prompt_type, *args):
    if prompt_type not in prompt_templates:
        raise ValueError(f"Prompt type '{prompt_type}' not supported.")
    prompt = prompt_templates[prompt_type].format(*args)
    print(f"Prompt to model:\n{prompt}\n")
    response = agent.run(prompt)
    return response



### User query testing

In [29]:
result = ask_agent(agent, "compare", "string theory", "pinpoint theory")
print(result)

Prompt to model:
Compare and contrast these two concepts: 'string theory' and 'pinpoint theory'



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: Do I need to use a tool? No
AI: It seems there might be a bit of confusion in your question. While 'string theory' is a well-known concept in theoretical physics, 'pinpoint theory' is not a recognized term in the field. 

String theory is a theoretical framework in which the point-like particles of particle physics are replaced by one-dimensional objects called strings. It describes how these strings propagate through space and interact with each other. The theory has the potential to provide a unified description of gravity and particle physics, as it incorporates quantum mechanics and general relativity.

On the other hand, 'pinpoint theory' doesn't appear to be a standard term in physics. If you're referring to a specific concept, could you please provide more context or details?[0m

[1m> Finished chain.[0m
It seems

In [30]:
result = ask_agent(agent, "explanation", "Black holes")
print(result)

Prompt to model:
Explain in detail: 'Black holes'



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: Do I need to use a tool? No
AI: A black hole is a region of spacetime where gravity is so strong that nothing, not even particles or electromagnetic radiation such as light, can escape from it. The theory of general relativity predicts that a sufficiently compact mass can deform spacetime to form a black hole.

Black holes are characterized by the "event horizon," a boundary in spacetime through which matter and light can only pass inward towards the mass of the black hole. Nothing, not even light, can escape from inside the event horizon. The size of the event horizon is proportional to the mass of the black hole.

There are three types of black holes: stellar black holes, supermassive black holes, and intermediate black holes. Stellar black holes are formed when a massive star collapses under its own gravity. Supermassive black holes, which are millions to billions

In [31]:
result = ask_agent(agent, "source", "Black holes")
print(result)

Prompt to model:
Please provide the video source for the following topic: 'Black holes'



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: Do I need to use a tool? Yes
Action: YouTubeTranscriptQA
Action Input: 'Black holes'[0m
📝 Query: 'Black holes'


  result = qa_chain({"query": input_text})


🗒️ Retrieved 4 source documents

Observation: [36;1m[1;3mI'm sorry, but your question is not clear. Could you please provide more information or context?

Sources:
41 - The Science of Interstellar with Science Advisor, Kip Thorne.en.txt
52 - The Sounds of Space ｜ StarTalk Live! at Guild Hall.en.txt
09 - Answering Questions About Time Travel, Aliens & Negative Gravity.en.txt
17 - When Black Holes Collide, with Nergis Mavalvala.en.txt[0m
Thought:[32;1m[1;3mDo I need to use a tool? No
AI: Here are some video sources related to the topic 'Black holes':

1. "The Science of Interstellar with Science Advisor, Kip Thorne"
2. "The Sounds of Space ｜ StarTalk Live! at Guild Hall"
3. "Answering Questions About Time Travel, Aliens & Negative Gravity"
4. "When Black Holes Collide, with Nergis Mavalvala"

You can search for these titles on YouTube to find more information about black holes.[0m

[1m> Finished chain.[0m
Here are some video sources related to the topic 'Black holes':

1. "The Sc

In [32]:
result = ask_agent(agent, "source", "string theory")
print(result)

Prompt to model:
Please provide the video source for the following topic: 'string theory'



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: Do I need to use a tool? Yes
Action: YouTubeTranscriptQA
Action Input: Please provide the video source for the following topic: 'string theory'[0m
📝 Query: Please provide the video source for the following topic: 'string theory'
🗒️ Retrieved 2 source documents

Observation: [36;1m[1;3mI'm sorry, but I can't provide the video source as I don't have the ability to access or search for videos.

Sources:
49 - Answering Fan Questions About Photons, Fire & Gravity Waves.en.txt
01 - Is The Universe Made of Tiny Vibrating Strings？ With Lara Anderson.en.txt[0m
Thought:[32;1m[1;3mDo I need to use a tool? No
AI: Here are some video sources related to the topic 'string theory':

1. "Is The Universe Made of Tiny Vibrating Strings? With Lara Anderson"
2. "Answering Fan Questions About Photons, Fire & Gravity Waves"

You can search for 