In [1]:


# Set OpenAI API Key
from google.colab import userdata
import os

# Get your key securely (no need to type it)
os.environ["OPENAI_API_KEY"] = userdata.get('OPENAI_API_KEY')

# ✅ Optional: check if it’s loaded
print("✅ OpenAI API key loaded successfully!" if os.environ.get("OPENAI_API_KEY") else "❌ Key not found")
print("✓ OpenAI API configured successfully!")
print(f"✓ Using model: gpt-4o-mini")

✅ OpenAI API key loaded successfully!
✓ OpenAI API configured successfully!
✓ Using model: gpt-4o-mini


In [5]:
!pip install numpy pandas pydantic scikit-learn xgboost imbalanced-learn langchain openai tqdm langchain_openai youtube-transcript-api langchain-community faiss-cpu tiktoken python-dotenv



In [6]:
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate
# !pip install youtube-transcript-api --upgrade

# Step 1a - Indexing (Document Ingestion)




In [17]:
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
import pandas as pd

VIDEO_URL = "https://www.youtube.com/watch?v=Gfr50f6ZBvo"

def get_video_id(url: str) -> str:
    if "v=" in url:
        return url.split("v=")[1].split("&")[0]
    elif "youtu.be/" in url:
        return url.split("youtu.be/")[1].split("?")[0]
    else:
        raise ValueError("Invalid YouTube URL format.")

video_id = get_video_id(VIDEO_URL)

try:
    yt_api = YouTubeTranscriptApi()
    # Try fetching in English first, fallback to Hindi (auto-generated)
    transcript_snippets = yt_api.fetch(video_id, languages=["en", "hi"])

    # Convert snippet objects to text
    texts = [snippet.text.strip() for snippet in transcript_snippets if snippet.text.strip()]
    full_transcript = " ".join(texts)

except TranscriptsDisabled:
    print("❌ Captions are disabled for this video.")
    full_transcript = ""
except Exception as e:
    print(f"⚠️ Could not fetch transcript: {e}")
    full_transcript = ""

# ===========================
# PREVIEW
# ===========================
if full_transcript:
    df = pd.DataFrame([{"video_id": video_id, "transcript": full_transcript}])

print("\n--- TRANSCRIPT PREVIEW ---")
print(full_transcript[:1000] + "..." if full_transcript else "No transcript available.")

transcript = full_transcript



--- TRANSCRIPT PREVIEW ---
the following is a conversation with demus hasabis ceo and co-founder of deepmind a company that has published and builds some of the most incredible artificial intelligence systems in the history of computing including alfred zero that learned all by itself to play the game of gold better than any human in the world and alpha fold two that solved protein folding both tasks considered nearly impossible for a very long time demus is widely considered to be one of the most brilliant and impactful humans in the history of artificial intelligence and science and engineering in general this was truly an honor and a pleasure for me to finally sit down with him for this conversation and i'm sure we will talk many times again in the future this is the lex friedman podcast to support it please check out our sponsors in the description and now dear friends here's demis hassabis let's start with a bit of a personal question am i an ai program you wrote to interview peo

In [16]:
transcript_snippets.to_raw_data()

[{'text': 'the following is a conversation with',
  'start': 0.08,
  'duration': 3.44},
 {'text': 'demus hasabis', 'start': 1.76, 'duration': 4.96},
 {'text': 'ceo and co-founder of deepmind', 'start': 3.52, 'duration': 5.119},
 {'text': 'a company that has published and builds',
  'start': 6.72,
  'duration': 4.48},
 {'text': 'some of the most incredible artificial',
  'start': 8.639,
  'duration': 4.561},
 {'text': 'intelligence systems in the history of',
  'start': 11.2,
  'duration': 4.8},
 {'text': 'computing including alfred zero that',
  'start': 13.2,
  'duration': 3.68},
 {'text': 'learned', 'start': 16.0, 'duration': 2.96},
 {'text': 'all by itself to play the game of gold',
  'start': 16.88,
  'duration': 4.559},
 {'text': 'better than any human in the world and',
  'start': 18.96,
  'duration': 5.6},
 {'text': 'alpha fold two that solved protein',
  'start': 21.439,
  'duration': 4.241},
 {'text': 'folding', 'start': 24.56, 'duration': 4.16},
 {'text': 'both tasks consider

# Step 1b - Indexing (Text Splitting)

In [18]:
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = splitter.create_documents([transcript])


# Step 1c & 1d - Indexing (Embedding Generation and Storing in Vector Store)

In [20]:
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
vector_store = FAISS.from_documents(chunks, embeddings)

In [22]:
vector_store.index_to_docstore_id

{0: 'ebd35c27-6277-4aa8-a7f0-ff6c18c6307e',
 1: '1d6c5f50-cd89-45b5-9e89-ab6db5e5e47e',
 2: '0e0c5539-0367-4fd6-bf6f-10819062051f',
 3: '1c957d0b-160a-435b-bee3-7ede91d109cd',
 4: 'b0b31d2d-cc09-4826-b184-c29a89d47c2a',
 5: '2a51ae51-0732-4f0e-9d82-a7a599dba8d5',
 6: 'ad525777-9c91-40d1-a691-69a17195108a',
 7: '773e0290-2f6c-4e9f-a54b-63c10e140702',
 8: '5a2fbd69-0972-43ba-b62c-f21520e98502',
 9: 'a763e2b3-ff8e-4576-91a7-3a1ec2efd721',
 10: 'a868697f-a861-4717-a423-9b07830ce261',
 11: 'b10a0dfa-4c8c-4f18-a4fa-7fc3ebdc5ca0',
 12: '2dd6e7fd-1aed-43dd-a594-6ddd46336f12',
 13: 'a97cdc08-d917-4f73-96db-083fe5f0345f',
 14: '99ed4a22-03b4-463f-ba91-9e168fd49088',
 15: '6366ef8c-11e7-4dfa-ae94-f3481f4e416f',
 16: 'f66bebe8-7408-4329-b042-0aebea35b6d0',
 17: '5f1a298e-aac7-4136-a4bf-1f8724099aa9',
 18: '65b51d0c-4b52-4fe7-92ca-80df1b57f39d',
 19: '5e99f97a-ecb3-4010-bc9a-c90d600256fe',
 20: '4045a3e5-50e7-4305-a7dc-f661231a8f95',
 21: '38c4776f-2b78-47c2-bd21-72846cae6228',
 22: '37800099-09e8-

In [24]:
vector_store.get_by_ids(['3b1f9ef2-304a-4bda-b24f-861c79e7a823'])

[Document(id='3b1f9ef2-304a-4bda-b24f-861c79e7a823', metadata={}, page_content="the music yourself so i think we see this in all forms of life so it'll be that times you know a million but it would you can imagine also one sign of intelligence is the ability to explain things clearly and simply right you know people like richard feynman another one of my all-time heroes used to say that right if you can't you know if you can explain it something simply then you that's a that's the best sign a complex topic simply then that's one of the best signs of you understanding it yeah so i can see myself talking trash in the ai system in that way yes uh it gets frustrated how dumb i am and trying to explain something to me i was like well that means you're not intelligent because if you were intelligent you'd be able to explain it simply yeah of course you know there's also the other option of course we could enhance ourselves and and without devices we we are already sort of symbiotic with our 

# Step 2 - Retrieval

In [25]:
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 4})

In [26]:
retriever

VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x78080f9d5d90>, search_kwargs={'k': 4})

In [31]:
retriever.invoke("Who is demus hasabis")

[Document(id='ebd35c27-6277-4aa8-a7f0-ff6c18c6307e', metadata={}, page_content="the following is a conversation with demus hasabis ceo and co-founder of deepmind a company that has published and builds some of the most incredible artificial intelligence systems in the history of computing including alfred zero that learned all by itself to play the game of gold better than any human in the world and alpha fold two that solved protein folding both tasks considered nearly impossible for a very long time demus is widely considered to be one of the most brilliant and impactful humans in the history of artificial intelligence and science and engineering in general this was truly an honor and a pleasure for me to finally sit down with him for this conversation and i'm sure we will talk many times again in the future this is the lex friedman podcast to support it please check out our sponsors in the description and now dear friends here's demis hassabis let's start with a bit of a personal qu

# Step 3 - Augmentation

In [34]:
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.2)

In [36]:
prompt = PromptTemplate(
    template="""
    You are an helpful assitance.
    answer only from provided transcript context.
    if the content is insufficient just say i dont know

    {context}
    Question:{question}
    """,
    input_variables=["context", "question"]
)

In [53]:
question = "what about alien"
retrieved_docs = retriever.invoke(question)

In [54]:
for doc in retrieved_docs:
  print(doc.page_content)

thoughts it could be some interactions with our mind that we think are originating from us is actually something that uh is coming from other life forms elsewhere consciousness itself might be that it could be but i don't see any sensible argument to the why why would all of the alien species be using this way yes some of them will be more primitive they would be close to our level you know there would there should be a whole sort of normal distribution of these things right some would be aggressive some would be you know curious others would be very stoical and philosophical because you know maybe they're a million years older than us but it's not it shouldn't be like what i mean one one alien civilization might be like that communicating thoughts and others but i don't see why you know potentially the hundreds there should be would be uniform in this way right it could be a violent dictatorship that the the people the alien civilizations that uh become successful become um [Music]
ot

In [55]:
context_text = "\n\n".join(docs.page_content for docs in retrieved_docs)
context_text


"thoughts it could be some interactions with our mind that we think are originating from us is actually something that uh is coming from other life forms elsewhere consciousness itself might be that it could be but i don't see any sensible argument to the why why would all of the alien species be using this way yes some of them will be more primitive they would be close to our level you know there would there should be a whole sort of normal distribution of these things right some would be aggressive some would be you know curious others would be very stoical and philosophical because you know maybe they're a million years older than us but it's not it shouldn't be like what i mean one one alien civilization might be like that communicating thoughts and others but i don't see why you know potentially the hundreds there should be would be uniform in this way right it could be a violent dictatorship that the the people the alien civilizations that uh become successful become um [Music]\n

In [56]:
final_prompt = prompt.invoke({"context": context_text, "question": question})

In [57]:
final_prompt

StringPromptValue(text="\n    You are an helpful assitance.\n    answer only from provided transcript context.\n    if the content is insufficient just say i dont know\n\n    thoughts it could be some interactions with our mind that we think are originating from us is actually something that uh is coming from other life forms elsewhere consciousness itself might be that it could be but i don't see any sensible argument to the why why would all of the alien species be using this way yes some of them will be more primitive they would be close to our level you know there would there should be a whole sort of normal distribution of these things right some would be aggressive some would be you know curious others would be very stoical and philosophical because you know maybe they're a million years older than us but it's not it shouldn't be like what i mean one one alien civilization might be like that communicating thoughts and others but i don't see why you know potentially the hundreds t

# Step 4 - Generation

In [58]:
answer = llm.invoke(final_prompt)

In [59]:
print(answer.content)

The discussion suggests that there may be many alien civilizations, but the speaker personally believes that we are likely alone based on the evidence available. They argue that we should have heard signs of other civilizations by now, and the lack of evidence leads them to think that if aliens were everywhere, we would have noticed. They also mention the possibility of different types of alien civilizations, ranging from primitive to advanced, but question why they would all communicate in a similar way. The speaker reflects on the idea that if other civilizations exist, they may face challenges that prevent them from becoming multi-planetary or reaching out into the stars.


# Building a **Chain**

In [64]:
from langchain_core.runnables import RunnableParallel, RunnablePassthrough,RunnableLambda
from langchain_core.output_parsers import StrOutputParser

In [70]:
def format_docs(retrieved_docs):
  return "\n\n".join(docs.page_content for docs in retrieved_docs)

In [74]:
paralle_chain = RunnableParallel(
    context=retriever | RunnableLambda(format_docs),
    question=RunnablePassthrough()
)

In [75]:
paralle_chain.invoke("who is demis")

{'context': "the following is a conversation with demus hasabis ceo and co-founder of deepmind a company that has published and builds some of the most incredible artificial intelligence systems in the history of computing including alfred zero that learned all by itself to play the game of gold better than any human in the world and alpha fold two that solved protein folding both tasks considered nearly impossible for a very long time demus is widely considered to be one of the most brilliant and impactful humans in the history of artificial intelligence and science and engineering in general this was truly an honor and a pleasure for me to finally sit down with him for this conversation and i'm sure we will talk many times again in the future this is the lex friedman podcast to support it please check out our sponsors in the description and now dear friends here's demis hassabis let's start with a bit of a personal question am i an ai program you wrote to interview people until i get

In [76]:
parser = StrOutputParser()

In [77]:
main_chain = paralle_chain | prompt | llm | parser

In [81]:
main_chain.invoke("about alien? and give me the answer in a list format")

'1. The speaker believes that we are likely alone in the universe, based on current evidence.\n2. They think that if there were many alien civilizations, we should have heard a "cacophony of voices" by now.\n3. The speaker argues that the search for extraterrestrial life has been extensive enough to expect some evidence if it existed.\n4. They mention that alien civilizations could vary greatly, with some being primitive and others potentially more advanced or aggressive.\n5. The idea of a "great filter" is introduced, suggesting that there may be barriers preventing civilizations from advancing to a multi-planetary stage or reaching out into the stars.\n6. The speaker emphasizes the importance of considering these questions, especially regarding humanity\'s potential self-destruction.'