In [3]:
# Necessary imports
import os
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_groq import ChatGroq
from langchain_core.output_parsers import StrOutputParser
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import PromptTemplate
from langchain_classic.retrievers.document_compressors import LLMChainExtractor
from langchain_classic.retrievers import MultiQueryRetriever, ContextualCompressionRetriever
from langchain_pinecone import PineconeVectorStore
from dotenv import load_dotenv

# Load huggingface api key
load_dotenv()

True

In [4]:
embedding_model = HuggingFaceEmbeddings(model="sentence-transformers/all-MiniLM-L6-v2")
llm = ChatGroq(model="llama-3.1-8b-instant")
base_compressor = LLMChainExtractor.from_llm(llm=llm)
model = ChatGoogleGenerativeAI(model='gemini-2.5-flash-lite')

Both GOOGLE_API_KEY and GEMINI_API_KEY are set. Using GOOGLE_API_KEY.


## 1. Indexing

In [5]:
# =============================== 1. Document Ingestion ===============================
video_id = "Gfr50f6ZBvo"

try:
    api = YouTubeTranscriptApi()
    transcript_list = api.list(video_id=video_id).find_transcript(["en", "hi"]).fetch()
    transcript = " ".join(chunk.text for chunk in transcript_list) # video transcript 

except TranscriptsDisabled:
    print(f"Transcript for the {video_id} is not found !")
except Exception as e:
    print(f"An Error Occured: {type(e)}")

#  =============================== 2. Text Splitting/Document Chunking ===============================
splitter = RecursiveCharacterTextSplitter(
    chunk_size=900,
    chunk_overlap=20
)
chunks = splitter.create_documents(texts=[transcript]) # smaller chunks of the large transcript document

print(f"========== Total chunks: {len(chunks)} ==========")
for i, chunk in enumerate(chunks[:3], start=1):
    print(f"\n========== Chunk {i}:==========\n")
    print(chunk.page_content)

# =============================== 3. Storing into a Vector Store ===============================
vector_store = PineconeVectorStore.from_documents(
    documents=chunks,
    embedding=embedding_model,
    index_name=os.getenv("PINECONE_INDEX_NAME")
)
print("\n=============================== Pinecone Vector Store Created Successfully ! ===============================\n" )



the following is a conversation with demus hasabis ceo and co-founder of deepmind a company that has published and builds some of the most incredible artificial intelligence systems in the history of computing including alfred zero that learned all by itself to play the game of gold better than any human in the world and alpha fold two that solved protein folding both tasks considered nearly impossible for a very long time demus is widely considered to be one of the most brilliant and impactful humans in the history of artificial intelligence and science and engineering in general this was truly an honor and a pleasure for me to finally sit down with him for this conversation and i'm sure we will talk many times again in the future this is the lex friedman podcast to support it please check out our sponsors in the description and now dear friends here's demis hassabis let's start with a


let's start with a bit of a personal question am i an ai program you wrote to interview people u

## 2. Retrieval

In [6]:
mmr_retriever = vector_store.as_retriever(
    search_type="mmr",
    kwargs={
        "k": 5,
        "fetch_k": 25,
        "lambda_mult": 0.6
    }
)

ss_retriever = vector_store.as_retriever(
    search_type="similarity",
    kwargs={"k":4}

)

multi_query_retriever = MultiQueryRetriever.from_llm(
    retriever=ss_retriever,
    llm=llm      
)

contextual_compression_retriever = ContextualCompressionRetriever(
    base_compressor=base_compressor,
    base_retriever=multi_query_retriever
)

In [7]:
query = "what were the main topics disscussed  ?"
context = contextual_compression_retriever.invoke(query)
context

[Document(metadata={}, page_content='demas establish to support this podcast please check out our sponsors in the description and now let me leave you with some words from edskar dykstra computer science is no more about computers than astronomy is about telescopes thank you for listening and hope to see you next time'),
 Document(metadata={}, page_content='>>\ndemas establish to support this podcast please check out our sponsors in the description and now let me leave you with some words from edskar dykstra computer science is no more about computers than astronomy is about telescopes thank you for listening and hope to see you next time'),
 Document(metadata={}, page_content='demas establish to support this podcast please check out our sponsors in the description \nand now let me leave you with some words from edskar dykstra computer science is no more about computers than astronomy is about telescopes \nthank you for listening'),
 Document(metadata={}, page_content='>>\ndemas establ

In [8]:
context_text = " ".join(doc.page_content for doc in context)
context_text

"demas establish to support this podcast please check out our sponsors in the description and now let me leave you with some words from edskar dykstra computer science is no more about computers than astronomy is about telescopes thank you for listening and hope to see you next time >>\ndemas establish to support this podcast please check out our sponsors in the description and now let me leave you with some words from edskar dykstra computer science is no more about computers than astronomy is about telescopes thank you for listening and hope to see you next time demas establish to support this podcast please check out our sponsors in the description \nand now let me leave you with some words from edskar dykstra computer science is no more about computers than astronomy is about telescopes \nthank you for listening >>\ndemas establish to support this podcast please check out our sponsors in the description and now let me leave you with some words from edskar dykstra computer science i

## 3. Augmentation

In [9]:
prompt = PromptTemplate(
    template="""
Answer the following question only usind the provided context, If the context not enough just return "Context is not enough to provide accurate Answer !"
context: {context_text}
question: {query}
give answer with proper format and give citations
""",
input_variables=["context", "query"]
)

## 4. Generation

In [10]:
parser = StrOutputParser()

chain = prompt | model | parser

final_result = chain.invoke({"context_text": context_text, "query": query})
final_result

'The main topics discussed were:\n\n*   **Founding tenets of DeepMind:** This included algorithmic advances like deep learning, reinforcement learning, advancements in understanding the human brain (with FMRI), the importance of compute power and GPUs, and mathematical/theoretical definitions of intelligence.\n*   **Limitations of current physics and the need for fundamental explanations:** There was a discussion about the "standard model of physics which we know doesn\'t work but we still keep adding to" and the potential for more fundamental explanations of physics to encompass mysteries like consciousness, life, and gravity. This also touched upon cognitive limitations and how non-human systems might achieve deeper understanding.\n\nCitations:\n*   "i used to discuss um uh uh what were the sort of founding tenets of deep mind and it was very various things one was um algorithmic advances so deep learning you know jeff hinton and cohen just had just sort of invented that in academia 

In [11]:
# Questions 
# demus hasabis
# what is deepmind
# what were the main topics disscussed 