In [None]:
# !pip install -q youtube-transcript-api langchain-community langchain-openai langchain-google-genai faiss-cpu tiktoken python-dotenv

In [None]:
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate

In [None]:
from dotenv import load_dotenv
load_dotenv()

True

# Step 1a - Indexing (Document Ingestion)

In [None]:
video_id = 'Gfr50f6ZBvo' # only the ID, not full URL

try:
    # If you don't care which language, this returns the 'best' one
    transcript_list = YouTubeTranscriptApi.get_transcript(video_id=video_id, languages=['en'])

    # Flatten it to plain text
    transcript = " ".join(chunk['text'] for chunk in transcript_list)
    print(transcript)

except TranscriptsDisabled:
    print('No captions available for this video.')

the following is a conversation with demus hasabis ceo and co-founder of deepmind a company that has published and builds some of the most incredible artificial intelligence systems in the history of computing including alfred zero that learned all by itself to play the game of gold better than any human in the world and alpha fold two that solved protein folding both tasks considered nearly impossible for a very long time demus is widely considered to be one of the most brilliant and impactful humans in the history of artificial intelligence and science and engineering in general this was truly an honor and a pleasure for me to finally sit down with him for this conversation and i'm sure we will talk many times again in the future this is the lex friedman podcast to support it please check out our sponsors in the description and now dear friends here's demis hassabis let's start with a bit of a personal question am i an ai program you wrote to interview people until i get good enough 

In [None]:
transcript_list

[{'text': 'the following is a conversation with',
  'start': 0.08,
  'duration': 3.44},
 {'text': 'demus hasabis', 'start': 1.76, 'duration': 4.96},
 {'text': 'ceo and co-founder of deepmind', 'start': 3.52, 'duration': 5.119},
 {'text': 'a company that has published and builds',
  'start': 6.72,
  'duration': 4.48},
 {'text': 'some of the most incredible artificial',
  'start': 8.639,
  'duration': 4.561},
 {'text': 'intelligence systems in the history of',
  'start': 11.2,
  'duration': 4.8},
 {'text': 'computing including alfred zero that',
  'start': 13.2,
  'duration': 3.68},
 {'text': 'learned', 'start': 16.0, 'duration': 2.96},
 {'text': 'all by itself to play the game of gold',
  'start': 16.88,
  'duration': 4.559},
 {'text': 'better than any human in the world and',
  'start': 18.96,
  'duration': 5.6},
 {'text': 'alpha fold two that solved protein',
  'start': 21.439,
  'duration': 4.241},
 {'text': 'folding', 'start': 24.56, 'duration': 4.16},
 {'text': 'both tasks consider

# Step 1b - Indexing(Text splitting)

In [None]:
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = splitter.create_documents([transcript])

In [None]:
len(chunks)

168

In [None]:
chunks[0]

Document(metadata={}, page_content="the following is a conversation with demus hasabis ceo and co-founder of deepmind a company that has published and builds some of the most incredible artificial intelligence systems in the history of computing including alfred zero that learned all by itself to play the game of gold better than any human in the world and alpha fold two that solved protein folding both tasks considered nearly impossible for a very long time demus is widely considered to be one of the most brilliant and impactful humans in the history of artificial intelligence and science and engineering in general this was truly an honor and a pleasure for me to finally sit down with him for this conversation and i'm sure we will talk many times again in the future this is the lex friedman podcast to support it please check out our sponsors in the description and now dear friends here's demis hassabis let's start with a bit of a personal question am i an ai program you wrote to inter

# Step 1c, 1d - Indexing(Embedding Generation and Storing in Vector Store)

In [None]:
# embedding_model = GoogleGenerativeAIEmbeddings(model='models/embedding-001')
embedding_model = OpenAIEmbeddings()
vector_store = FAISS.from_documents(documents=chunks, embedding=embedding_model)

In [None]:
vector_store

<langchain_community.vectorstores.faiss.FAISS at 0x7d6b4ad5c0d0>

In [None]:
vector_store.index_to_docstore_id

{0: 'a126f3a6-cd04-4296-aafe-4ef1d9a1cef9',
 1: '01eaf468-f834-4b53-a0ab-7c5708a3e353',
 2: '9ee09f0a-f741-4910-beef-b4535a14319f',
 3: 'b479f885-38c5-4c5a-b4ae-adcf925bbd9c',
 4: 'c3b953c7-471a-428b-9470-0316809656c4',
 5: '43959c63-23c5-43ba-86a4-7facf466d42b',
 6: '2048f5a7-5001-4a3f-858d-96ccf7de520a',
 7: 'c205f73c-cda5-422a-a8eb-b564e0dbf9a8',
 8: 'ed3a7de8-7960-4fbb-92e2-80b8a35d6c36',
 9: '59d11fbb-3417-41d0-bd74-2976a57cce77',
 10: 'bd867c7d-cca9-47e5-acca-46475de6b295',
 11: '45f4adc0-7164-4ca0-a269-b6c38bc89f80',
 12: 'fc5e5706-faf1-412a-a726-606ec7699759',
 13: 'b3690191-27d1-4e56-ac9b-a8ee32492927',
 14: '8816a225-8073-4336-8b0a-13e75152d3d6',
 15: '06d25b0f-0740-4e43-b25e-db3b1f96f9eb',
 16: '5f3be473-9b74-4ca3-8861-ee112ed6c9a4',
 17: '33625d77-0298-4080-b4cf-f6d6d46d210d',
 18: '801e826a-df99-45e4-a039-75cced1b64d9',
 19: '86db1b76-7401-4792-ad03-1eb7bde78f6e',
 20: 'c7e1d857-c6e2-408a-8ed1-c9720f93b663',
 21: '34394d06-c485-4d3c-9696-70532e5f5cb2',
 22: 'a089bf9d-8325-

In [None]:
vector_store.get_by_ids(['a126f3a6-cd04-4296-aafe-4ef1d9a1cef9'])

[Document(id='a126f3a6-cd04-4296-aafe-4ef1d9a1cef9', metadata={}, page_content="the following is a conversation with demus hasabis ceo and co-founder of deepmind a company that has published and builds some of the most incredible artificial intelligence systems in the history of computing including alfred zero that learned all by itself to play the game of gold better than any human in the world and alpha fold two that solved protein folding both tasks considered nearly impossible for a very long time demus is widely considered to be one of the most brilliant and impactful humans in the history of artificial intelligence and science and engineering in general this was truly an honor and a pleasure for me to finally sit down with him for this conversation and i'm sure we will talk many times again in the future this is the lex friedman podcast to support it please check out our sponsors in the description and now dear friends here's demis hassabis let's start with a bit of a personal qu

# Step 2 - Retrieval

In [None]:
retriever = vector_store.as_retriever(search_type='similarity', search_kwargs={'k':4})

In [None]:
retriever

VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x7d6b4ad5c0d0>, search_kwargs={'k': 4})

In [None]:
retriever.invoke('What is deepmind')

[Document(id='a126f3a6-cd04-4296-aafe-4ef1d9a1cef9', metadata={}, page_content="the following is a conversation with demus hasabis ceo and co-founder of deepmind a company that has published and builds some of the most incredible artificial intelligence systems in the history of computing including alfred zero that learned all by itself to play the game of gold better than any human in the world and alpha fold two that solved protein folding both tasks considered nearly impossible for a very long time demus is widely considered to be one of the most brilliant and impactful humans in the history of artificial intelligence and science and engineering in general this was truly an honor and a pleasure for me to finally sit down with him for this conversation and i'm sure we will talk many times again in the future this is the lex friedman podcast to support it please check out our sponsors in the description and now dear friends here's demis hassabis let's start with a bit of a personal qu

# Step 3 - Augmentation

In [None]:
# llm = ChatGoogleGenerativeAI(model='gemini-2.0-flash', temperature=0.2)
llm = ChatOpenAI(model='gpt-4.1-nano', temperature=0.2)

In [None]:
prompt = PromptTemplate(
    template="""
    You are a helpful assistant.
    Answer ONLY from the provided transcript context.
    If the contet is insufficient, just say you don't know.

    {context}
    Question: {question}
    """,
    input_variables=['context', 'question']
)

In [None]:
question = "is the topic of aliens discussed in this video? if yes what was discussed"
retrieved_docs = retriever.invoke(question)

In [None]:
retrieved_docs

[Document(id='8eaac6f9-5de3-4ef4-9dd9-e18b56f98ec6', metadata={}, page_content="potentially the hundreds there should be would be uniform in this way right it could be a violent dictatorship that the the people the alien civilizations that uh become successful become um [Music] gain the ability to be destructive an order of magnitude more destructive but of course the the sad thought well either humans are very special we took a lot of leaps that arrived at what it means to be human yeah um there's a question there which was the hardest which was the most special but also if others have reached this level and maybe many others have reached this level the great filter that prevented them from going farther to becoming a multi-planetary species or reaching out into the stars and those are really important questions for us whether um whether there's other alien civilizations out there or not this is very useful for us to think about if we destroy ourselves how will we do it and how easy i

In [None]:
context_text = "\n\n".join(doc.page_content for doc in retrieved_docs)

In [None]:
final_prompt = prompt.invoke({'context': context_text, 'question': question})

In [None]:
final_prompt

StringPromptValue(text="\n    You are a helpful assistant.\n    Answer ONLY from the provided transcript context.\n    If the contet is insufficient, just say you don't know.\n\n    potentially the hundreds there should be would be uniform in this way right it could be a violent dictatorship that the the people the alien civilizations that uh become successful become um [Music] gain the ability to be destructive an order of magnitude more destructive but of course the the sad thought well either humans are very special we took a lot of leaps that arrived at what it means to be human yeah um there's a question there which was the hardest which was the most special but also if others have reached this level and maybe many others have reached this level the great filter that prevented them from going farther to becoming a multi-planetary species or reaching out into the stars and those are really important questions for us whether um whether there's other alien civilizations out there or 

# Step 4 - Generation

In [None]:
answer = llm.invoke(final_prompt)
print(answer.content)

Yes, the topic of aliens is discussed in this video. It was mentioned that alien civilizations could be more primitive or more advanced than humans, with a range of behaviors from aggressive to curious or philosophical. There is also a discussion about the possibility of other alien civilizations reaching similar levels of development, and the idea that the distribution of their characteristics might not be uniform. Additionally, it was suggested that some alien civilizations might communicate thoughts or use other methods, but there is no sensible argument for why all would do so in the same way. The conversation also touched on the idea that successful alien civilizations could be violent dictatorships.


# Building a Chain

In [None]:
from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser

In [None]:
def format_docs(retrieved_docs):
    context_text = "\n\n".join(doc.page_content for doc in retrieved_docs)
    return context_text

In [None]:
parallel_chain = RunnableParallel({
    'context': retriever | RunnableLambda(format_docs),
    'question': RunnablePassthrough()
})

In [None]:
parallel_chain.invoke("Who is Demis")

{'context': "the following is a conversation with demus hasabis ceo and co-founder of deepmind a company that has published and builds some of the most incredible artificial intelligence systems in the history of computing including alfred zero that learned all by itself to play the game of gold better than any human in the world and alpha fold two that solved protein folding both tasks considered nearly impossible for a very long time demus is widely considered to be one of the most brilliant and impactful humans in the history of artificial intelligence and science and engineering in general this was truly an honor and a pleasure for me to finally sit down with him for this conversation and i'm sure we will talk many times again in the future this is the lex friedman podcast to support it please check out our sponsors in the description and now dear friends here's demis hassabis let's start with a bit of a personal question am i an ai program you wrote to interview people until i get

In [None]:
parser = StrOutputParser()

In [None]:
main_chain = parallel_chain | prompt | llm | parser

In [None]:
main_chain.invoke("Can you summarize the video")

'The video features a conversation with Demis Hassabis, CEO and co-founder of DeepMind, discussing advancements in artificial intelligence, including systems like AlphaFold2 and AlphaZero. They explore themes such as the nature of intelligence, the importance of explaining complex ideas simply, and the potential for enhancing human cognition through technology. The discussion also touches on philosophical questions about AI and human understanding, emphasizing the significance of clear communication and the ongoing quest to solve fundamental mysteries in science and consciousness.'

# Improvements -

1. UI based enhancements

2. Evaluation
    a. Ragas - faithfulness, answer_relevancy, context_precision, context_recall
    b. LangSmith

3. Indexing
    a. Document Ingestion
    b. Text Splitting
    c. Vector Store

4. Retrieval  
    a. Pre-Retrieval
        Query rewriting using LLM
        Multi-query generation
        Domain aware routing

    b. During Retrieval
        MMR
        Hybrid Retrieval
        Reranking
    
    c. Post-Retrieval
        Contextual Compression
    
5. Augmentation
    a. Prompt Templating
    b. Answer grounding
    c. Context window optimization

6. Generation
    a. Answer with Citation
    b. Guard railing

7. System Design
    a. Multimodal
    b. Agentic
    c. Memory Based