In [None]:
import os

os.environ["GOOGLE_API_KEY"] = "YOUR_GOOGLE_API"

In [None]:
# !pip install -q youtube-transcript-api langchain-community langchain-google-genai google-generativeai faiss-cpu tiktoken python-dotenv langchain langchain-text-splitters

In [None]:
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate



In [6]:
# video_id = "fh2dBmLN-ZM" # only the ID, not full URL
# try:
#     # If you don’t care which language, this returns the “best” one
#     transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=["en"])

#     # Flatten it to plain text
#     transcript = " ".join(chunk["text"] for chunk in transcript_list)
#     print(transcript)

# except TranscriptsDisabled:
#     print("No captions available for this video.")

In [None]:
video_id = "K4Ze-Sp6aUE"

try:
    # 1. Create an instance of the API

    api = YouTubeTranscriptApi()

    # 2. Use .fetch() instead of .get_transcript()
    # 3. Call .to_raw_data() to get the familiar list of dictionaries
    transcript_list = api.fetch(video_id, languages=["en"]).to_raw_data()

    # Join the text parts into one string
    transcript = " ".join(chunk["text"] for chunk in transcript_list)
    print("Transcript fetched successfully!")
    print(transcript[:500])

except Exception as e:
    print(f"An error occurred: {e}")

Transcript fetched successfully!
ANDREW HUBERMAN: Welcome to
the Huberman Lab podcast, where we discuss science
and science-based tools for everyday life. I'm Andrew Huberman,
and I'm a professor of neurobiology
and ophthalmology at Stanford School of Medicine. Today, my guest is
Dr. Layne Norton. Dr. Norton is one of
the foremost experts in protein metabolism,
fat loss, and nutrition. He did his degrees in
biochemistry and nutritional sciences and is considered
one of the world experts in understanding how we
extract energy fr


In [8]:
transcript_list[:20]

[{'text': 'ANDREW HUBERMAN: Welcome to\nthe Huberman Lab podcast,',
  'start': 0.0,
  'duration': 2.22},
 {'text': 'where we discuss science\nand science-based tools',
  'start': 2.22,
  'duration': 2.67},
 {'text': 'for everyday life.', 'start': 4.89, 'duration': 0.78},
 {'text': "I'm Andrew Huberman,\nand I'm a professor",
  'start': 9.13,
  'duration': 1.77},
 {'text': 'of neurobiology\nand ophthalmology',
  'start': 10.9,
  'duration': 2.16},
 {'text': 'at Stanford School of Medicine.', 'start': 13.06, 'duration': 1.68},
 {'text': 'Today, my guest is\nDr. Layne Norton.',
  'start': 14.74,
  'duration': 2.28},
 {'text': 'Dr. Norton is one of\nthe foremost experts',
  'start': 17.02,
  'duration': 2.1},
 {'text': 'in protein metabolism,\nfat loss, and nutrition.',
  'start': 19.12,
  'duration': 3.09},
 {'text': 'He did his degrees in\nbiochemistry and nutritional',
  'start': 22.21,
  'duration': 2.64},
 {'text': 'sciences and is considered\none of the world experts',
  'start': 24.

In [9]:
transcript_list[-20:]

[{'text': "And if you're not already\nfollowing us on social media,",
  'start': 13728.41,
  'duration': 2.5},
 {'text': 'we are Huberman\nLab on Instagram,',
  'start': 13730.91,
  'duration': 2.06},
 {'text': 'Huberman Lab on Twitter, and\nHuberman Lab on Facebook.',
  'start': 13732.97,
  'duration': 2.76},
 {'text': 'And at all of those\nsites, I provide',
  'start': 13735.73,
  'duration': 2.28},
 {'text': 'science and science-related\ntools for mental health,',
  'start': 13738.01,
  'duration': 2.422},
 {'text': 'physical health, and\nperformance, some of which',
  'start': 13740.432,
  'duration': 1.958},
 {'text': 'overlap with information covered\non the Huberman Lab podcast,',
  'start': 13742.39,
  'duration': 2.88},
 {'text': 'but often which is distinct\nfrom information covered',
  'start': 13745.27,
  'duration': 2.76},
 {'text': 'on the Huberman Lab podcast.', 'start': 13748.03, 'duration': 1.23},
 {'text': "So again, that's Huberman\nLab on Instagram, Twitter,",
  'st

---

# Chunking


In [None]:
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunk = splitter.create_documents([transcript])

In [11]:
len(chunk)

290

---


# Embedding generation & storing in vector store


In [None]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")
vector_store = FAISS.from_documents(chunk, embeddings)

In [None]:
list(vector_store.index_to_docstore_id.items())[-10:]

[(280, '9c2176e1-be9d-44b0-836d-f3ed21bc9a4a'),
 (281, '1783d87e-3952-4603-990b-b7729c3357be'),
 (282, '857b3a17-9407-430b-bb16-66933558731d'),
 (283, '1e2a747e-ac7b-4cfe-b784-a82461534ed4'),
 (284, '55400379-0834-48a7-879e-3af9c8378d4e'),
 (285, '32694026-b1cc-4387-8ed5-8bd4acb90f9b'),
 (286, '9b1d25ce-27e1-413d-ab97-3a790f424030'),
 (287, '30334820-ebf0-4984-ab9f-1809a9262c2e'),
 (288, '726feaf1-0ba3-41ff-bc36-901ffa98e388'),
 (289, '88b631ec-128f-4d22-bfb9-d3bdcccded84')]

In [None]:
vector_store.get_by_ids(["88b631ec-128f-4d22-bfb9-d3bdcccded84"])

[]

---


# Retrieval


In [None]:
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 4})

In [16]:
retriever

VectorStoreRetriever(tags=['FAISS', 'GoogleGenerativeAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x7f5afd651040>, search_kwargs={'k': 4})

In [17]:
retriever.invoke("what is protein folding")

[Document(id='44898221-42a6-4437-9be5-f08e2d204cd1', metadata={}, page_content="familiar, this is part of the mTOR signaling pathway. So one of the--\ntwo of the targets of mTOR, when it's stimulated--\nand leucine stimulates mTOR. Two of the targets of mTOR are a\nprotein compound called 4E-BP1. And then another one is called\na ribosomal protein S6K. So I don't want to get\ninto the specifics about it because it's going\nto be on the scope. But basically, when these things\nare phosphorylated by mTOR, it increases the rate of\ntranslation initiation, which translation initiation\nis, basically, the process of the ribosome\nhooking on to the mRNA and then starting\nprotein synthesis. So I was looking at\nthe phosphorylation of 4E-BP1 and RPS6. I was like, OK,\nwell, I'll probably see these things come\ndown in three hours. Still plateaued. And so then it was like,\nwhat's going on here? So I actually kept rerunning\nthe data and rerunning the data and rerunning the data. And I'll neve

---


# Augmentation


In [None]:
llm = GoogleGenerativeAI(model="gemini-2.5-flash", temperature=0.4)

In [None]:
prompt1 = PromptTemplate(
    template="""You are a helpful assistant. Answer only from the provided transcript content.If the content is insufficient just say I dont know. {context}, Question={user_question}""",
    input_variables=["context", "user_question"],
)

In [None]:
question = (
    "is the topic of protein discussed in the question if yes then what is discussed"
)

retrieved_docs = retriever.invoke(question)

In [None]:
content_text = "\n\n".join(doc.page_content for doc in retrieved_docs)

In [None]:
final_prompt = prompt1.invoke({"context": retrieved_docs, "user_question": question})

---


# Generation


In [None]:
result = llm.invoke(final_prompt)

In [41]:
result

'Yes, the topic of protein is discussed in the transcript.\n\nHere\'s what is discussed about protein:\n\n*   **Storage Capacity:** Protein has almost no storage capacity, unlike fat (unlimited) and carbohydrates (large capacity). This leads to questioning the idea of making up for low protein at one meal by overconsuming at another.\n*   **Studies:** A study in rats involving whey protein (a high-quality protein) is mentioned, where both groups received the same amount of calories.\n*   **Plant-Based Options:** Leucine and other essential amino acids are noted as options for plant-based individuals, and it\'s stated that plant-based people can build impressive amounts of muscle.\n*   **Dietary Importance:** Protein is considered a core component of a diet and the "biggest lever" among macronutrients.\n*   **Satiety Signals:** The relationship between protein and satiety signals is brought up.\n*   **Bioavailability:** The bioavailability of animal sources of protein for muscle buildin

---


# With Chain


In [None]:
prompt1 = PromptTemplate(
    template="""You are a helpful assistant. Answer only from the provided transcript content.If the content is insufficient just say I dont know. {context}, Question={user_question}""",
    input_variables=["context", "user_question"],
)

In [None]:
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 4})

In [None]:
from langchain_core.runnables import (
    RunnableParallel,
    RunnablePassthrough,
    RunnableLambda,
)
from langchain_core.output_parsers import StrOutputParser

parser = StrOutputParser()

In [None]:
# parallel chains
# question | passthrough
# question | retriever
# both augment then |prompt|llm

In [None]:
def format_docs(retrieved_docs):
    content_text = "\n\n".join(doc.page_content for doc in retrieved_docs)
    return content_text

In [None]:
parallel_chain = RunnableParallel(
    {
        "context": retriever | RunnableLambda(format_docs),
        "user_question": RunnablePassthrough(),
    }
)

In [48]:
parallel_chain.invoke("how much protein is sufficient for body")

{'context': "to get a lot of the muscle building benefits. I mean, I think\nthe benefits really start to plateau out around\n1.6 grams per kilogram of body weight. There's some evidence\nthat maybe even up to like 2.4 or\n2.8 grams per kilo may give a little\nbit more benefit. I think it probably\nlooks something like an asymptote\nin terms of a curve where as you put\nmore into the system, you always get a\nlittle bit more, but it just gets to the point\nwhere it's so infinitesimally small benefit that it's for all\nintensive purposes, no benefit. ANDREW HUBERMAN: But you\nmentioned 1.6 grams of protein per kilogram of body\nweight, would you consider that a threshold\nthat most people should try and achieve daily? LAYNE NORTON: I see very few\ndownsides to hitting that. I mean, I know some\npeople-- and this is going to get into a\nseparate conversation. But I know some\npeople will say, well, I don't want to stimulate\nmTOR because that's going to make me die early. And I think, one

In [None]:
chain1 = parallel_chain | prompt1 | llm | parser

In [53]:
chain1.invoke("how much protein should a person eat normally")

"The benefits of muscle building start to plateau around 1.6 grams per kilogram of body weight. There is some evidence that up to 2.4 or 2.8 grams per kilo may give a little more benefit, but it's an infinitesimally small benefit. Layne Norton sees very few downsides to hitting 1.6 grams of protein per kilogram of body weight daily."

In [51]:
chain1.invoke("who is sundar pichhai")

"I don't know."

In [None]:
chain1.invoke("what is google")

"I don't know."

In [54]:
parallel_chain.invoke("how much protein should a person eat normally")

{'context': "to get a lot of the muscle building benefits. I mean, I think\nthe benefits really start to plateau out around\n1.6 grams per kilogram of body weight. There's some evidence\nthat maybe even up to like 2.4 or\n2.8 grams per kilo may give a little\nbit more benefit. I think it probably\nlooks something like an asymptote\nin terms of a curve where as you put\nmore into the system, you always get a\nlittle bit more, but it just gets to the point\nwhere it's so infinitesimally small benefit that it's for all\nintensive purposes, no benefit. ANDREW HUBERMAN: But you\nmentioned 1.6 grams of protein per kilogram of body\nweight, would you consider that a threshold\nthat most people should try and achieve daily? LAYNE NORTON: I see very few\ndownsides to hitting that. I mean, I know some\npeople-- and this is going to get into a\nseparate conversation. But I know some\npeople will say, well, I don't want to stimulate\nmTOR because that's going to make me die early. And I think, one

In [55]:
chain1.invoke("how much protein should a person eat normally")

'A person should aim for around 1.6 grams of protein per kilogram of body weight daily. The benefits for muscle building start to plateau at this amount. There\'s some evidence that up to 2.4 or 2.8 grams per kilo may offer a "little bit more benefit," but this benefit is described as "infinitesimally small." Layne Norton sees "very few downsides" to hitting the 1.6 grams per kilogram threshold daily.'