In [40]:
from youtube_transcript_api import YouTubeTranscriptApi , TranscriptsDisabled
from langchain_community.vectorstores import FAISS
from langchain_groq import ChatGroq
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.prompts import PromptTemplate
from langchain_core.documents import Document
from langchain_huggingface import HuggingFaceEmbeddings

In [34]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")

In [49]:
groq_api_key = os.getenv("GROQ_API_KEY")

In [35]:
os.environ["HuggingFace_API_KEY"] = os.getenv("HuggingFace_API_KEY")


In [12]:
video_id = "Gfr50f6ZBvo" # only the ID, not full URL

try:
    transcripts_list = YouTubeTranscriptApi.get_transcript(video_id , languages = ["en"])
    transcripts = "".join((chunk["text"]) for chunk in transcripts_list)
    print(transcripts)

except TranscriptsDisabled:
    print("No Transcripts are available")

the following is a conversation withdemus hasabisceo and co-founder of deepminda company that has published and buildssome of the most incredible artificialintelligence systems in the history ofcomputing including alfred zero thatlearnedall by itself to play the game of goldbetter than any human in the world andalpha fold two that solved proteinfoldingboth tasks considered nearly impossiblefor a very long timedemus is widely considered to be one ofthe most brilliant and impactful humansin the history of artificialintelligence and science and engineeringin generalthis was truly an honor and a pleasurefor me to finally sit down with him forthis conversation and i'm sure we willtalk many times again in the futurethis is the lex friedman podcast tosupport it please check out our sponsorsin the description and now dear friendshere's demishassabislet's start with a bit of a personalquestionam i an ai program you wrote tointerview people until i get good enoughto interview youwell i'll be imp

Text Spitting

In [30]:
splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap=200)
chunks = splitter.create_documents([transcripts])
len(chunks)

163

In [32]:
chunks[50]

Document(metadata={}, page_content="crystallize the protein which isreally difficult some proteins can't becrystallized like membrane proteins andthen you have to use very expensiveelectron microscopes or x-raycrystallography machines reallypainstaking work to get the 3d structureand visualize the 3d structure so therule of thumb in in experimental biologyis that it takes one phd student theirentire phd to do one protein uh and withalpha fold two we were able to predictthe 3d structure in a matter of secondsum and so we were you know overchristmas we did the whole humanproteome or every protein in the humanbody all 20 000 proteins so the humanproteins like the equivalent of thehuman genome but on protein space and uhand sort of revolutionize really what uha structural biologist can do becausenow um they don't have to worry aboutthese painstaking experimentals you knowshould they put all of that effort in ornot they can almost just look up thestructure of their proteins like agoogle sea

Embeddings

In [37]:
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

  from .autonotebook import tqdm as notebook_tqdm





In [None]:
vector_store = FAISS.from_documents(chunks , embeddings)


In [43]:
vector_store.index_to_docstore_id

{0: 'b3231b78-4a55-4e6a-aec8-3b11e149cdde',
 1: '5747fbbf-f587-41a8-891a-063c911aab89',
 2: 'e08261a2-45f9-4f68-8a39-ca3eb6c4ada3',
 3: '7951b99d-01f6-4e6f-9f85-ad1052835582',
 4: '20302780-698c-47ce-9e60-b7d56daaa6b6',
 5: '7287727f-fad0-480e-b50d-cb36d5703888',
 6: 'ef8f60cf-9280-447a-acb8-341f91024fe5',
 7: 'c395efba-571c-4ba4-97f5-5af3695691c4',
 8: '02fa7a91-a581-46f5-bb50-3687893324d8',
 9: '46f528fd-0dc9-4c1c-ad0b-91edcc070d81',
 10: 'e5df23c2-dc00-4e62-b6ba-3cf107734f07',
 11: '8c9ed83d-6b48-4718-84ee-552539b5a644',
 12: '15c4a815-be30-43fc-9bf5-00878fabdd4f',
 13: 'ba1f2a55-9ca0-431f-b4b0-d25d4a276a0d',
 14: '252ba7f0-d864-414d-a5b8-e155be1d6fac',
 15: 'f4508929-6b9b-42f4-a696-67229d9785d6',
 16: '120fb183-0da0-4ed6-a96e-387eaa4b33c6',
 17: '69a96daf-640e-4298-b252-9c74f92b2d99',
 18: '37f491ef-55af-4e98-a645-5af824592ccf',
 19: '13c727ac-5aa0-4b33-aaa5-ce89429de955',
 20: '65b73185-069c-4be1-a0d0-7b8d40de1556',
 21: 'feb5bfad-0fb1-450c-a56f-e55832854c64',
 22: '9de51f8a-243b-

In [45]:
print(vector_store.get_by_ids(["b3231b78-4a55-4e6a-aec8-3b11e149cdde"]))

[Document(id='b3231b78-4a55-4e6a-aec8-3b11e149cdde', metadata={}, page_content="the following is a conversation withdemus hasabisceo and co-founder of deepminda company that has published and buildssome of the most incredible artificialintelligence systems in the history ofcomputing including alfred zero thatlearnedall by itself to play the game of goldbetter than any human in the world andalpha fold two that solved proteinfoldingboth tasks considered nearly impossiblefor a very long timedemus is widely considered to be one ofthe most brilliant and impactful humansin the history of artificialintelligence and science and engineeringin generalthis was truly an honor and a pleasurefor me to finally sit down with him forthis conversation and i'm sure we willtalk many times again in the futurethis is the lex friedman podcast tosupport it please check out our sponsorsin the description and now dear friendshere's demishassabislet's start with a bit of a personalquestionam i an ai program you 

Retriever

In [46]:
retriever = vector_store.as_retriever(search_type = "similarity", search_kwargs = {"k":4})
retriever

VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000002669B1FAE00>, search_kwargs={'k': 4})

In [47]:
retriever.invoke("what is deepmind")

[Document(id='b3231b78-4a55-4e6a-aec8-3b11e149cdde', metadata={}, page_content="the following is a conversation withdemus hasabisceo and co-founder of deepminda company that has published and buildssome of the most incredible artificialintelligence systems in the history ofcomputing including alfred zero thatlearnedall by itself to play the game of goldbetter than any human in the world andalpha fold two that solved proteinfoldingboth tasks considered nearly impossiblefor a very long timedemus is widely considered to be one ofthe most brilliant and impactful humansin the history of artificialintelligence and science and engineeringin generalthis was truly an honor and a pleasurefor me to finally sit down with him forthis conversation and i'm sure we willtalk many times again in the futurethis is the lex friedman podcast tosupport it please check out our sponsorsin the description and now dear friendshere's demishassabislet's start with a bit of a personalquestionam i an ai program you 

Augmentation

In [60]:
llm = ChatGroq(groq_api_key = groq_api_key , model = "meta-llama/llama-4-maverick-17b-128e-instruct",temperature=0.4)
llm


ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x000002669E169B40>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x000002669E16AB00>, model_name='meta-llama/llama-4-maverick-17b-128e-instruct', temperature=0.4, model_kwargs={}, groq_api_key=SecretStr('**********'))

In [52]:
prompt = PromptTemplate(
    template=""""
    You are a helpful assistant.
    Answer only based on the transcript context provided.
    If context is insufficient , just say I don't know
    {context}
    question : {question}
    """,
    input_variables = ["context","question"]
)

In [62]:
question = "is the topic of nuclear fusion discussed in this video? if yes then what was discussed"
retrieved_docs = retriever.invoke(question)

In [63]:
retrieved_docs

[Document(id='a2f115e2-6e9a-4d0d-a81b-c235d87b64b2', metadata={}, page_content="and engineering challenges aswell to build these massive fusionreactors and contain the plasma and whatwe try to do whenever we go into a newfieldto apply our systems is we look for umwe talk to domain experts we try andfind the best people in the world tocollaborate with umin this case in fusion we wecollaborated with epfl in switzerlandthe swiss technical institute who areamazing they have a test reactor thatthey were willing to let us use whichyou know i double checked with the teamwe were going to use carefully andsafelyi was impressed they managed to persuadethem to let us use it and um and it's ait's an amazing test reactor they havethere and they try all sorts of prettycrazy experiments on it and um the thethe what we tend to look at is if we gointo a new domain like fusion what areall the bottleneck problems uh likethinking from first principles you knowwhat are all the bottleneck problemsthat are s

In [64]:
context = "\n\n".join(doc.page_content for doc in retrieved_docs)
context

"and engineering challenges aswell to build these massive fusionreactors and contain the plasma and whatwe try to do whenever we go into a newfieldto apply our systems is we look for umwe talk to domain experts we try andfind the best people in the world tocollaborate with umin this case in fusion we wecollaborated with epfl in switzerlandthe swiss technical institute who areamazing they have a test reactor thatthey were willing to let us use whichyou know i double checked with the teamwe were going to use carefully andsafelyi was impressed they managed to persuadethem to let us use it and um and it's ait's an amazing test reactor they havethere and they try all sorts of prettycrazy experiments on it and um the thethe what we tend to look at is if we gointo a new domain like fusion what areall the bottleneck problems uh likethinking from first principles you knowwhat are all the bottleneck problemsthat are still stopping fusion workingtoday and then we look at we you know weget a\n\nou

In [65]:
final_prompt = prompt.invoke({"context":context , "question":question})
final_prompt

StringPromptValue(text='"\n    You are a helpful assistant.\n    Answer only based on the transcript context provided.\n    If context is insufficient , just say I don\'t know\n    and engineering challenges aswell to build these massive fusionreactors and contain the plasma and whatwe try to do whenever we go into a newfieldto apply our systems is we look for umwe talk to domain experts we try andfind the best people in the world tocollaborate with umin this case in fusion we wecollaborated with epfl in switzerlandthe swiss technical institute who areamazing they have a test reactor thatthey were willing to let us use whichyou know i double checked with the teamwe were going to use carefully andsafelyi was impressed they managed to persuadethem to let us use it and um and it\'s ait\'s an amazing test reactor they havethere and they try all sorts of prettycrazy experiments on it and um the thethe what we tend to look at is if we gointo a new domain like fusion what areall the bottlenec

Generation

In [66]:
answer = llm.invoke(final_prompt)
answer

AIMessage(content='Yes, the topic of nuclear fusion is discussed in this transcript. \n\nThe discussion revolves around the application of Artificial Intelligence (AI) and Deep Reinforcement Learning (Deep RL) to solve challenges in nuclear fusion, particularly in controlling high-temperature plasmas. The speaker mentions their collaboration with EPFL (Swiss Technical Institute) and their work on a test reactor. They highlight that one of the significant challenges in fusion is containing the plasma in specific shapes, and they claim to have made progress in this area using Deep RL. The speaker published a paper in Nature on this work and is now exploring further problems to tackle in the fusion area with various fusion startups.', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 130, 'prompt_tokens': 888, 'total_tokens': 1018, 'completion_time': 0.306125668, 'prompt_time': 0.017872828, 'queue_time': 0.048303361999999996, 'total_time': 0.323998496}, 'model_

In [67]:
answer.content

'Yes, the topic of nuclear fusion is discussed in this transcript. \n\nThe discussion revolves around the application of Artificial Intelligence (AI) and Deep Reinforcement Learning (Deep RL) to solve challenges in nuclear fusion, particularly in controlling high-temperature plasmas. The speaker mentions their collaboration with EPFL (Swiss Technical Institute) and their work on a test reactor. They highlight that one of the significant challenges in fusion is containing the plasma in specific shapes, and they claim to have made progress in this area using Deep RL. The speaker published a paper in Nature on this work and is now exploring further problems to tackle in the fusion area with various fusion startups.'

In [70]:
from langchain_core.output_parsers import StrOutputParser

In [77]:
parser =  StrOutputParser()

In [78]:
chain = prompt | llm | parser

In [83]:
chain.invoke({"context":context , "question":"can you summarise the video"})

'The speaker discusses their work on applying AI to nuclear fusion, specifically using deep reinforcement learning to control high-temperature plasmas. They collaborated with EPFL in Switzerland, who have a test reactor, and were able to successfully control the plasma in specific shapes, a major challenge in fusion. They published their work in a Nature paper and are now exploring other problems in the fusion area with various startups. The speaker believes that AI can help accelerate progress in energy and climate, and that fusion is one area where AI can make a significant impact, alongside disease and biology. They also mention the potential for AI to help discover new materials and optimize systems, such as batteries.'