In [12]:
# Necessary imports
import os
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_groq import ChatGroq
from langchain_core.output_parsers import StrOutputParser
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import PromptTemplate
from langchain_classic.retrievers.document_compressors import LLMChainExtractor
from langchain_classic.retrievers import MultiQueryRetriever, ContextualCompressionRetriever
from langchain_pinecone import PineconeVectorStore
from dotenv import load_dotenv

# Load huggingface api key
load_dotenv()

True

In [13]:
embedding_model = HuggingFaceEmbeddings(model="sentence-transformers/all-MiniLM-L6-v2")
llm = ChatGroq(model="llama-3.1-8b-instant")
base_compressor = LLMChainExtractor.from_llm(llm=llm)
model = ChatGoogleGenerativeAI(model='gemini-2.5-flash-lite')

Both GOOGLE_API_KEY and GEMINI_API_KEY are set. Using GOOGLE_API_KEY.


## 1. Indexing

In [14]:
# =============================== 1. Document Ingestion ===============================
video_id = "Gfr50f6ZBvo"

try:
    api = YouTubeTranscriptApi()
    transcript_list = api.list(video_id=video_id).find_transcript(["en", "hi"]).fetch()
    transcript = " ".join(chunk.text for chunk in transcript_list) # video transcript 

except TranscriptsDisabled:
    print(f"Transcript for the {video_id} is not found !")
except Exception as e:
    print(f"An Error Occured: {type(e)}")

#  =============================== 2. Text Splitting/Document Chunking ===============================
splitter = RecursiveCharacterTextSplitter(
    chunk_size=900,
    chunk_overlap=20
)
chunks = splitter.create_documents(texts=[transcript]) # smaller chunks of the large transcript document

print(f"========== Total chunks: {len(chunks)} ==========")
for i, chunk in enumerate(chunks[:3], start=1):
    print(f"\n========== Chunk {i}:==========\n")
    print(chunk.page_content)

# =============================== 3. Storing into a Vector Store ===============================
vector_store = PineconeVectorStore.from_documents(
    documents=chunks,
    embedding=embedding_model,
    index_name=os.getenv("PINECONE_INDEX_NAME")
)
print("\n=============================== Pinecone Vector Store Created Successfully ! ===============================\n" )



the following is a conversation with demus hasabis ceo and co-founder of deepmind a company that has published and builds some of the most incredible artificial intelligence systems in the history of computing including alfred zero that learned all by itself to play the game of gold better than any human in the world and alpha fold two that solved protein folding both tasks considered nearly impossible for a very long time demus is widely considered to be one of the most brilliant and impactful humans in the history of artificial intelligence and science and engineering in general this was truly an honor and a pleasure for me to finally sit down with him for this conversation and i'm sure we will talk many times again in the future this is the lex friedman podcast to support it please check out our sponsors in the description and now dear friends here's demis hassabis let's start with a


let's start with a bit of a personal question am i an ai program you wrote to interview people u

## 2. Retrieval

In [15]:
mmr_retriever = vector_store.as_retriever(
    search_type="mmr",
    kwargs={
        "k": 5,
        "fetch_k": 25,
        "lambda_mult": 0.6
    }
)

ss_retriever = vector_store.as_retriever(
    search_type="similarity",
    kwargs={"k":4}

)

multi_query_retriever = MultiQueryRetriever.from_llm(
    retriever=ss_retriever,
    llm=llm      
)

contextual_compression_retriever = ContextualCompressionRetriever(
    base_compressor=base_compressor,
    base_retriever=multi_query_retriever
)

In [16]:
query = "what were the main topics disscussed  ?"
context = contextual_compression_retriever.invoke(query)
context

[Document(metadata={}, page_content='demas establish to support this podcast please check out our sponsors in the description and now let me leave you with some words from edskar dykstra computer science is no more about computers than astronomy is about telescopes'),
 Document(metadata={}, page_content='>>\ndemas establish to support this podcast please check out our sponsors in the description and now let me leave you with some words from edskar dykstra computer science is no more about computers than astronomy is about telescopes thank you for listening and hope to see you next time\n>>'),
 Document(metadata={}, page_content='computer science is no more about computers than astronomy is about telescopes'),
 Document(metadata={}, page_content='demas establish to support this podcast please check out our sponsors in the description \nnow let me leave you with some words from edskar dykstra \ncomputer science is no more about computers than astronomy is about telescopes'),
 Document(me

In [17]:
context_text = " ".join(doc.page_content for doc in context)
context_text

"demas establish to support this podcast please check out our sponsors in the description and now let me leave you with some words from edskar dykstra computer science is no more about computers than astronomy is about telescopes >>\ndemas establish to support this podcast please check out our sponsors in the description and now let me leave you with some words from edskar dykstra computer science is no more about computers than astronomy is about telescopes thank you for listening and hope to see you next time\n>> computer science is no more about computers than astronomy is about telescopes demas establish to support this podcast please check out our sponsors in the description \nnow let me leave you with some words from edskar dykstra \ncomputer science is no more about computers than astronomy is about telescopes call them alpha x projects right is and the easiest way to see that is the evolution of alphago to alpha zero so \nalphago was um a learning system but it was specifically

## 3. Augmentation

In [18]:
prompt = PromptTemplate(
    template="""
Answer the following question only usind the provided context, If the context not enough just return "Context is not enough to provide accurate Answer !"
context: {context_text}
question: {query}
give answer with proper format and give citations
""",
input_variables=["context", "query"]
)

## 4. Generation

In [19]:
parser = StrOutputParser()

chain = prompt | model | parser

final_result = chain.invoke({"context_text": context_text, "query": query})
final_result

'The main topics discussed in the context are:\n\n*   **Evolution of AlphaGo to AlphaZero and MuZero:** The discussion covers the progression of AlphaGo, starting as a system trained to play Go, to AlphaZero, which could play any two-player game from scratch, and finally MuZero, which could learn the rules of a game itself. This evolution highlights advancements in learning systems and removing the need for human knowledge and predefined rules [1, 2, 3, 4, 5, 6, 7, 8, 9, 10].\n\n*   **Founding of DeepMind and Early AI Debates:** The context touches upon the early days of DeepMind, the challenges in securing funding in 2010, and debates with figures from "old-fashioned AI" like Minsky and Patrick Winston, who doubted the potential of learning systems [11, 12, 13, 14].\n\n*   **Day in the Life and Habits of Researchers:** There\'s a segment that asks about daily routines, including wake-up times, habits, coffee consumption, computer setups, and preferred text editors (Emacs/Vim), and how

In [20]:
# Questions 
# demus hasabis
# what is deepmind
# what were the main topics disscussed 