In [1]:
!pip uninstall -y google-ai-generativelanguage google-generativeai
!pip install \
  youtube-transcript-api \
  langchain-community \
  langchain-openai \
  langchain-google-genai \
  faiss-cpu \
  tiktoken \
  python-dotenv \
  google-ai-generativelanguage==0.6.15


Found existing installation: google-ai-generativelanguage 0.6.15
Uninstalling google-ai-generativelanguage-0.6.15:
  Successfully uninstalled google-ai-generativelanguage-0.6.15
Found existing installation: google-generativeai 0.8.4
Uninstalling google-generativeai-0.8.4:
  Successfully uninstalled google-generativeai-0.8.4
Collecting youtube-transcript-api
  Downloading youtube_transcript_api-1.1.0-py3-none-any.whl.metadata (23 kB)
Collecting langchain-community
  Downloading langchain_community-0.3.26-py3-none-any.whl.metadata (2.9 kB)
Collecting langchain-openai
  Downloading langchain_openai-0.3.24-py3-none-any.whl.metadata (2.3 kB)
Collecting langchain-google-genai
  Downloading langchain_google_genai-2.1.5-py3-none-any.whl.metadata (5.2 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.8 kB)
Collecting python-dotenv
  Downloading python_dotenv-1.1.0-py3-none-any.whl.metadata (24 kB)
Collecting google-ai-gen

In [None]:
import os
import google.generativeai as genai

os.environ["GOOGLE_API_KEY"] = "ADD you own API key here."
genai.configure(api_key=os.environ["GOOGLE_API_KEY"])


In [3]:
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate
from langchain_google_genai import GoogleGenerativeAIEmbeddings 


## Step 1: load transcript using yt api

In [None]:
video_id = "Gfr50f6ZBvo" # only the ID, not full URL
try:
    transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=["en"])

    transcript = " ".join(chunk["text"] for chunk in transcript_list)
    print(transcript)

except TranscriptsDisabled:
    print("No captions available for this video.")

the following is a conversation with demus hasabis ceo and co-founder of deepmind a company that has published and builds some of the most incredible artificial intelligence systems in the history of computing including alfred zero that learned all by itself to play the game of gold better than any human in the world and alpha fold two that solved protein folding both tasks considered nearly impossible for a very long time demus is widely considered to be one of the most brilliant and impactful humans in the history of artificial intelligence and science and engineering in general this was truly an honor and a pleasure for me to finally sit down with him for this conversation and i'm sure we will talk many times again in the future this is the lex friedman podcast to support it please check out our sponsors in the description and now dear friends here's demis hassabis let's start with a bit of a personal question am i an ai program you wrote to interview people until i get good enough 

In [5]:
transcript_list[2]

{'text': 'ceo and co-founder of deepmind', 'start': 3.52, 'duration': 5.119}

## Step 2: Now that we've loaded the data, we have to split in into small chunks

In [6]:
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = splitter.create_documents([transcript])

In [7]:
len(chunks)

168

In [8]:
chunks[0]

Document(metadata={}, page_content="the following is a conversation with demus hasabis ceo and co-founder of deepmind a company that has published and builds some of the most incredible artificial intelligence systems in the history of computing including alfred zero that learned all by itself to play the game of gold better than any human in the world and alpha fold two that solved protein folding both tasks considered nearly impossible for a very long time demus is widely considered to be one of the most brilliant and impactful humans in the history of artificial intelligence and science and engineering in general this was truly an honor and a pleasure for me to finally sit down with him for this conversation and i'm sure we will talk many times again in the future this is the lex friedman podcast to support it please check out our sponsors in the description and now dear friends here's demis hassabis let's start with a bit of a personal question am i an ai program you wrote to inter

## Step 3: we will creat embedding out of those small chunks and then stored them in the vector database

In [9]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.vectorstores import FAISS

embeddings = GoogleGenerativeAIEmbeddings(
    model="models/embedding-001",
    dimensions=25
)
vector_store = FAISS.from_documents(chunks,embeddings)


In [10]:
vector_store.index_to_docstore_id

{0: '0f1cb1b2-c7ea-43cd-a2e5-635c192c6b6b',
 1: '8adf2cba-4b41-4bed-81c7-28afa55d17e6',
 2: 'b06a9fc4-67aa-46b6-96fe-52dacf341e19',
 3: '5c8c3c9b-5d41-43f6-91a3-9b6e5020cdf4',
 4: 'cb30885c-4509-4fdd-a9f2-23da3fc56e01',
 5: '6393dff7-9bf5-4c2e-91d5-bd93f7b3e861',
 6: 'b596d818-6500-411a-8a1b-4f6e28cd76ad',
 7: '72f3dabf-0006-4ff1-8470-d7b2480a1378',
 8: '33ab239c-5601-4f00-bb8b-6c938ab10268',
 9: 'ab1f8c77-017a-4046-a463-1f00e9ac0e5f',
 10: '69572fe9-6505-418a-a472-3c7964914e24',
 11: 'de55fcde-9012-4bd4-8ed1-9edc05de262c',
 12: '79080f92-1ea5-4bf0-a637-827bb1ad5645',
 13: '4a7a62ab-812b-4306-a56a-e1f79c960cbe',
 14: '3d593a20-b4f7-48a0-b366-949b43126df6',
 15: '1f3b52ff-1c1f-463f-8af8-4359a65ce28c',
 16: '01dea470-a225-4d5e-8e03-286a024cf5eb',
 17: 'bf10a78f-59a0-4d57-970c-a5822fadddbb',
 18: '52f764c9-5b6e-4fb0-8ce5-73f3380dba78',
 19: '4a0ee824-00fb-458d-9307-d67fc2fdd96a',
 20: '148c7557-5518-4576-876b-3739abbe520a',
 21: 'b80c9ed0-dec8-4625-9a87-c11918c825e3',
 22: '0f5ac3c8-5670-

In [None]:

vector_store.get_by_ids(['451a234c-0e84-41bd-99f1-add7db80b5ce'])

[]

## step 4: Retrieval 

In [12]:
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 2})

In [None]:
retriever.invoke("what is deepmind")

[Document(id='de744db0-05b4-4dc8-9129-2515dba520b5', metadata={}, page_content="that are amazingly smart at certain things like maybe playing go and chess and other things but they don't feel at all in any shape or form conscious in the way that you know you do to me or i do to you and um and i think actually building ai is uh these intelligent constructs uh is one of the best ways to explore the mystery of consciousness to break it down because um we're going to have devices that are pretty smart at certain things or capable of certain things but potentially won't have any semblance of self-awareness or other things and in fact i would advocate if there's a choice building systems in the first place ai systems that are not conscious to begin with uh are just tools um until we understand them better and the capabilities better so on that topic just not as the ceo of deep mind just as a human being let me ask you about this one particular anecdotal evidence of the google engineer who ma

## step 5: making prompt or the augmentation step here we'll merge the uerser query and the relivent chunks that we got from the retrival.

In [14]:
!pip install langchain-google-genai



In [15]:
# we will from an llm 
from langchain_google_genai import ChatGoogleGenerativeAI
#from langchain_google_genai import GoogleGenerativeAI


llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash-002",  # Use a model that is available
    openai_api_key=os.getenv("GOOGLE_API_KEY"),
    temperature=0.2,
    #max_completion_tokens=20
)


In [16]:
# merge the relivent chunks+ query = prompt
prompt = PromptTemplate(
    template="""
      You are a helpful assistant.
      Answer ONLY from the provided transcript context.
      If the context is insufficient, just say you don't know. 

      {context}
      Question: {question}
    """,
    input_variables = ['context', 'question']
)


In [17]:
question          = "is the topic of aliens discussed in this video? if yes then what was discussed write it point by point"
retrieved_docs    = retriever.invoke(question)

In [18]:
retrieved_docs

[Document(id='4fc1122a-8363-4a4f-b5d8-7cbf1f804f87', metadata={}, page_content="thoughts it could be some interactions with our mind that we think are originating from us is actually something that uh is coming from other life forms elsewhere consciousness itself might be that it could be but i don't see any sensible argument to the why why would all of the alien species be using this way yes some of them will be more primitive they would be close to our level you know there would there should be a whole sort of normal distribution of these things right some would be aggressive some would be you know curious others would be very stoical and philosophical because you know maybe they're a million years older than us but it's not it shouldn't be like what i mean one one alien civilization might be like that communicating thoughts and others but i don't see why you know potentially the hundreds there should be would be uniform in this way right it could be a violent dictatorship that the t

In [19]:
context_text = "\n\n".join(doc.page_content for doc in retrieved_docs)
context_text

"thoughts it could be some interactions with our mind that we think are originating from us is actually something that uh is coming from other life forms elsewhere consciousness itself might be that it could be but i don't see any sensible argument to the why why would all of the alien species be using this way yes some of them will be more primitive they would be close to our level you know there would there should be a whole sort of normal distribution of these things right some would be aggressive some would be you know curious others would be very stoical and philosophical because you know maybe they're a million years older than us but it's not it shouldn't be like what i mean one one alien civilization might be like that communicating thoughts and others but i don't see why you know potentially the hundreds there should be would be uniform in this way right it could be a violent dictatorship that the the people the alien civilizations that uh become successful become um [Music]\n

In [20]:
final_prompt = prompt.invoke({"context": context_text, "question": question})

In [21]:
final_prompt

StringPromptValue(text="\n      You are a helpful assistant.\n      Answer ONLY from the provided transcript context.\n      If the context is insufficient, just say you don't know. \n\n      thoughts it could be some interactions with our mind that we think are originating from us is actually something that uh is coming from other life forms elsewhere consciousness itself might be that it could be but i don't see any sensible argument to the why why would all of the alien species be using this way yes some of them will be more primitive they would be close to our level you know there would there should be a whole sort of normal distribution of these things right some would be aggressive some would be you know curious others would be very stoical and philosophical because you know maybe they're a million years older than us but it's not it shouldn't be like what i mean one one alien civilization might be like that communicating thoughts and others but i don't see why you know potential

## Step 4 - Generation

In [22]:
answer = llm.invoke(final_prompt)
print(answer.content)

Yes, the topic of aliens is discussed.  Here are some points:

* The possibility that some interactions we perceive as our own thoughts are actually originating from other life forms.
* The idea that consciousness itself might be coming from alien life forms.
* A question of why all alien species would communicate in the same way (through thoughts), suggesting a normal distribution of alien civilizations with varying levels of advancement and behavior (aggressive, curious, philosophical).
* The possibility that our reality is a hologram projected by aliens, similar to being in another universe.
* The possibility that alien communication methods are fundamentally different from ours and we are unable to understand them.
* The idea that our own thoughts could be a method of alien communication.


youtube chat done using rag 

# Now we will use chain. system to automate it


## first we will build parallel chain

In [23]:
from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser 

In [24]:

def format_docs(retrieved_docs):
  context_text = "\n\n".join(doc.page_content for doc in retrieved_docs)
  return context_text

In [25]:
parallel_chain = RunnableParallel({
    'context': retriever | RunnableLambda(format_docs),
    'question': RunnablePassthrough()
})

In [26]:
parallel_chain.invoke('who is Demis')

{'context': "the following is a conversation with demus hasabis ceo and co-founder of deepmind a company that has published and builds some of the most incredible artificial intelligence systems in the history of computing including alfred zero that learned all by itself to play the game of gold better than any human in the world and alpha fold two that solved protein folding both tasks considered nearly impossible for a very long time demus is widely considered to be one of the most brilliant and impactful humans in the history of artificial intelligence and science and engineering in general this was truly an honor and a pleasure for me to finally sit down with him for this conversation and i'm sure we will talk many times again in the future this is the lex friedman podcast to support it please check out our sponsors in the description and now dear friends here's demis hassabis let's start with a bit of a personal question am i an ai program you wrote to interview people until i get

## we will buil another chain

In [27]:
parser = StrOutputParser()

In [28]:
main_chain = parallel_chain | prompt | llm | parser

In [29]:
main_chain.invoke('Can you summarize the video')

'The transcript discusses self-improvement, focusing on identifying strengths and weaknesses, combining them with passions, and making a difference.  It then shifts to a discussion about physics, mentioning the limitations of the standard model and the need for more fundamental explanations.  A suggestion is made to illustrate these concepts with diagrams or a step-by-step explanation.'