In [17]:
!pip install -q langchain-community \
               langchain-huggingface \
               faiss-cpu \
               sentence-transformers \
               transformers \
               python-dotenv \
               youtube-transcript-api

In [18]:
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
import gradio as gr
from langchain.schema.runnable import RunnablePassthrough
import re
import os
from langchain_core.output_parsers import StrOutputParser

In [19]:
def get_video_id(url):
    if "v=" in url:
        return url.split("v=")[1].split("&")[0]
    elif "youtu.be" in url:
        return url.split("/")[-1]
    return None


In [20]:
def get_video_id(url):
    match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
    return match.group(1) if match else url.strip()

In [21]:
from youtube_transcript_api import YouTubeTranscriptApi
print(dir(YouTubeTranscriptApi))  # should now have get_transcript

['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', 'fetch', 'list']


In [22]:
from youtube_transcript_api import YouTubeTranscriptApi

yt = YouTubeTranscriptApi()
available_transcripts = yt.list("Ks-_Mh1QhMc")
print(available_transcripts)

For this video (Ks-_Mh1QhMc) transcripts are available in the following languages:

(MANUALLY CREATED)
 - sq ("Albanian")[TRANSLATABLE]
 - ar ("Arabic")[TRANSLATABLE]
 - hy ("Armenian")[TRANSLATABLE]
 - az ("Azerbaijani")[TRANSLATABLE]
 - bg ("Bulgarian")[TRANSLATABLE]
 - my ("Burmese")[TRANSLATABLE]
 - ca ("Catalan")[TRANSLATABLE]
 - zh-CN ("Chinese (China)")[TRANSLATABLE]
 - zh-TW ("Chinese (Taiwan)")[TRANSLATABLE]
 - hr ("Croatian")[TRANSLATABLE]
 - cs ("Czech")[TRANSLATABLE]
 - da ("Danish")[TRANSLATABLE]
 - nl ("Dutch")[TRANSLATABLE]
 - en ("English")[TRANSLATABLE]
 - et ("Estonian")[TRANSLATABLE]
 - fi ("Finnish")[TRANSLATABLE]
 - fr ("French")[TRANSLATABLE]
 - fr-CA ("French (Canada)")[TRANSLATABLE]
 - gl ("Galician")[TRANSLATABLE]
 - ka ("Georgian")[TRANSLATABLE]
 - de ("German")[TRANSLATABLE]
 - el ("Greek")[TRANSLATABLE]
 - iw ("Hebrew")[TRANSLATABLE]
 - hu ("Hungarian")[TRANSLATABLE]
 - id ("Indonesian")[TRANSLATABLE]
 - it ("Italian")[TRANSLATABLE]
 - ja ("Japanese")[TRANSL

In [23]:
transcript_dicts = [
    {"start": e.start, "duration": e.duration, "text": e.text}
    for e in yt.fetch("Ks-_Mh1QhMc", languages=['en'])
]
print(transcript_dicts[:5])

[{'start': 0.0, 'duration': 7.0, 'text': 'Translator: Joseph Geni\nReviewer: Morton Bast'}, {'start': 15.967, 'duration': 5.398, 'text': 'So I want to start by offering you\na free no-tech life hack,'}, {'start': 21.389, 'duration': 2.597, 'text': 'and all it requires of you is this:'}, {'start': 24.01, 'duration': 4.163, 'text': 'that you change your posture\nfor two minutes.'}, {'start': 28.197, 'duration': 3.4, 'text': 'But before I give it away,\nI want to ask you to right now'}]


In [24]:
from youtube_transcript_api import YouTubeTranscriptApi

yt = YouTubeTranscriptApi()

def get_transcript_dict(video_id, lang='en'):
    return [
        {"start": e.start, "duration": e.duration, "text": e.text}
        for e in yt.fetch(video_id, languages=[lang])
    ]

# Example:
transcript = get_transcript_dict("Ks-_Mh1QhMc")

In [25]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Join transcript entries into one big string
full_text = " ".join(entry["text"] for entry in transcript)

splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
chunks = splitter.create_documents([full_text])

print(chunks[0].page_content)  # Check first chunk

Translator: Joseph Geni
Reviewer: Morton Bast So I want to start by offering you
a free no-tech life hack, and all it requires of you is this: that you change your posture
for two minutes. But before I give it away,
I want to ask you to right now do a little audit of your body
and what you're doing with your body. So how many of you are
sort of making yourselves smaller? Maybe you're hunching, crossing your legs,
maybe wrapping your ankles. Sometimes we hold onto our arms like this. Sometimes we spread out. (Laughter) I see you. So I want you to pay attention
to what you're doing right now. We're going to come back
to that in a few minutes, and I'm hoping that if you learn
to tweak this a little bit, it could significantly change
the way your life unfolds. So, we're really fascinated
with body language, and we're particularly interested
in other people's body language. You know, we're interested in,


In [26]:
len(chunks)

24

## Step 1c & 1d - Indexing (Embedding Generation and Storing in Vector Store)



In [27]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [28]:
vector_store = FAISS.from_documents(chunks, embeddings)

In [29]:
vector_store.index_to_docstore_id

{0: 'acfbdc00-47ed-4f97-9036-3a862d0cf753',
 1: 'd3d94231-1274-4f74-8fe8-82a3e7b36af4',
 2: '372a2ea1-164b-4241-84d1-883f93f46486',
 3: 'a0a310a2-9823-4fee-a16c-e46c25471606',
 4: '793cb13b-8e35-4c7f-b094-8c66e3e62eba',
 5: '0876d7cf-5e31-45db-bbb5-a13486ebd8f0',
 6: '83495bc5-bad5-44c9-a6a9-818e8f507261',
 7: 'bcde53f9-3416-427f-96da-ae89bcc17f8b',
 8: 'c9bd7f90-f948-4851-98e6-1c09980f9a93',
 9: 'fbce1c5d-4364-4773-b4b2-00ded8d446ad',
 10: '05b020dd-1c03-45f4-91b2-da6e9c9bd204',
 11: '57a6fb79-6bf7-4455-a3a0-5ce55c58e94d',
 12: '09b8d0e5-9694-483b-8f14-6cdf8efe0394',
 13: '060d209b-9a6f-4499-98d3-38139c51b905',
 14: 'b5f59a6f-2eef-4150-b134-457ff001e4a3',
 15: '9090ac08-7126-400e-b54d-9e84714991e4',
 16: '18f04c87-8eb6-45c1-8895-8e7099f1258e',
 17: '8c8bb16b-3aa8-459d-9fa6-27501f319d74',
 18: 'e001cef1-6182-4ea2-85f6-c197e263a0f6',
 19: 'f9af39f8-4eea-4dae-b333-6cee913b30c8',
 20: 'c467ea4c-a5fd-4f62-a556-0f9ed709fcd9',
 21: 'e8fb10b5-6c80-454c-8099-d84a553eb87f',
 22: '19a48965-f5f2-

In [30]:
vector_store.get_by_ids(['c0cc193e-bfc2-4329-9d33-fd2d4d46a131'])

[]

In [31]:
cached_vector_store = {}


In [32]:
def get_vector_store(video_id, chunks, embeddings):
    if video_id not in cached_vector_store:
        cached_vector_store[video_id] = FAISS.from_documents(chunks, embeddings)
    return cached_vector_store[video_id]

In [33]:
vector_store = get_vector_store("Ks-_Mh1QhMc", chunks, embeddings)


# Step 2 - Retrieval


In [34]:
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 4})

In [35]:
retriever

VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x7e1e57387bd0>, search_kwargs={'k': 4})

# Step 3 - Augmentation

In [45]:
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
import os

os.environ["HUGGINGFACEHUB_API_TOKEN"] = ""

# Use a model that works well via Hugging Face Inference API
llm = HuggingFaceEndpoint(
    repo_id="meta-llama/Meta-Llama-3-8B-Instruct",  # or any from the table
    task="text-generation"
)

# Wrap for chat-style usage
model = ChatHuggingFace(llm=llm)

In [46]:
prompt = PromptTemplate(
    input_variables=["context", "question"],
    template="""
You are an expert assistant that answers questions based on a YouTube transcript.

Here is the transcript chunk:\n\n{context}

Answer the following question as clearly and specifically as possible:
{question}
"""
)

In [47]:
question          = "is the topic of nuclear fusion discussed in this video? if yes then what was discussed"
retrieved_docs    = retriever.invoke(question)

In [48]:
retrieved_docs

[Document(id='9f35d186-a70e-42ab-91a9-e0291a775c9e', metadata={}, page_content='that individual\'s testosterone has gone up significantly and his cortisol\nhas dropped significantly. So we have this evidence,\nboth that the body can shape the mind, at least at the facial level, and also that role changes\ncan shape the mind. So what happens, okay,\nyou take a role change, what happens if you do that\nat a really minimal level, like this tiny manipulation,\nthis tiny intervention? "For two minutes," you say,\n"I want you to stand like this, and it\'s going to make you feel\nmore powerful." So this is what we did. We decided to bring people into the lab\nand run a little experiment, and these people adopted, for two minutes, either high-power poses\nor low-power poses, and I\'m just going to show\nyou five of the poses, although they took on only two. So here\'s one. A couple more. This one has been dubbed\nthe "Wonder Woman" by the media. Here are a couple more. So you can be standing\n

In [49]:
context_text = "\n\n".join(doc.page_content for doc in retrieved_docs)
context_text

'that individual\'s testosterone has gone up significantly and his cortisol\nhas dropped significantly. So we have this evidence,\nboth that the body can shape the mind, at least at the facial level, and also that role changes\ncan shape the mind. So what happens, okay,\nyou take a role change, what happens if you do that\nat a really minimal level, like this tiny manipulation,\nthis tiny intervention? "For two minutes," you say,\n"I want you to stand like this, and it\'s going to make you feel\nmore powerful." So this is what we did. We decided to bring people into the lab\nand run a little experiment, and these people adopted, for two minutes, either high-power poses\nor low-power poses, and I\'m just going to show\nyou five of the poses, although they took on only two. So here\'s one. A couple more. This one has been dubbed\nthe "Wonder Woman" by the media. Here are a couple more. So you can be standing\nor you can be sitting. And here are the low-power poses. So you\'re folding up,

In [50]:
final_prompt = prompt.invoke({"context": context_text, "question": question})

In [51]:
final_prompt

StringPromptValue(text='\nYou are an expert assistant that answers questions based on a YouTube transcript.\n\nHere is the transcript chunk:\n\nthat individual\'s testosterone has gone up significantly and his cortisol\nhas dropped significantly. So we have this evidence,\nboth that the body can shape the mind, at least at the facial level, and also that role changes\ncan shape the mind. So what happens, okay,\nyou take a role change, what happens if you do that\nat a really minimal level, like this tiny manipulation,\nthis tiny intervention? "For two minutes," you say,\n"I want you to stand like this, and it\'s going to make you feel\nmore powerful." So this is what we did. We decided to bring people into the lab\nand run a little experiment, and these people adopted, for two minutes, either high-power poses\nor low-power poses, and I\'m just going to show\nyou five of the poses, although they took on only two. So here\'s one. A couple more. This one has been dubbed\nthe "Wonder Woman

# Step 4 - Generation

In [52]:

answer = model.invoke(final_prompt)
print(answer.content)

No, the topic of nuclear fusion is not discussed in this video. The transcript appears to be from a lecture or presentation about social psychology, specifically the topic of power dynamics and nonverbal behavior. The discussion centers around the concept of "power posing," the idea that adopting certain body postures can affect a person's feelings of power and confidence, and how this concept relates to participation and performance in a business school classroom.


In [53]:
from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser

In [54]:
def format_docs(retrieved_docs):
  context_text = "\n\n".join(doc.page_content for doc in retrieved_docs)
  return context_text

In [55]:
parallel_chain = RunnableParallel({
    'context': retriever | RunnableLambda(format_docs),
    'question': RunnablePassthrough()
})

In [56]:
parallel_chain.invoke('who is Demis')

{'context': 'Translator: Joseph Geni\nReviewer: Morton Bast So I want to start by offering you\na free no-tech life hack, and all it requires of you is this: that you change your posture\nfor two minutes. But before I give it away,\nI want to ask you to right now do a little audit of your body\nand what you\'re doing with your body. So how many of you are\nsort of making yourselves smaller? Maybe you\'re hunching, crossing your legs,\nmaybe wrapping your ankles. Sometimes we hold onto our arms like this. Sometimes we spread out. (Laughter) I see you. So I want you to pay attention\nto what you\'re doing right now. We\'re going to come back\nto that in a few minutes, and I\'m hoping that if you learn\nto tweak this a little bit, it could significantly change\nthe way your life unfolds. So, we\'re really fascinated\nwith body language, and we\'re particularly interested\nin other people\'s body language. You know, we\'re interested in,\n\nbut for a long time I had been thinking, "Not sup

In [57]:
parser = StrOutputParser()

In [58]:
main_chain = parallel_chain | prompt | model | parser

In [59]:
main_chain.invoke('what is the Conversation Going on in the video')

"The conversation in the video appears to be a mix of Amy Cuddy's explanations about nonverbal behavior and body language, along with a discussion about a specific study she conducted on the effects of power posing on job interview outcomes.\n\nMore specifically, Amy Cuddy is explaining to the audience how her study found that individuals who adopted high-power poses before a job interview were perceived as more competent and likable by the interviewers, whereas those who adopted low-power poses were not.\n\nShe also discusses the importance of nonverbal communication and how it can affect our interactions with others. Additionally, she addresses a common misconception about her research, where people mistakenly assume that her study suggests adopting high-power poses before a job interview is a way to fake confidence. Instead, she emphasizes that the study is about the internal, psychological effect of power posing, rather than its external, social effects."

In [60]:
def answer_from_youtube(url, question):
    video_id = get_video_id(url)
    if not video_id:
        return "Invalid YouTube URL."

    yt = YouTubeTranscriptApi()

    try:
        # Fetch transcript and convert to dict format
        transcript = [
            {"start": e.start, "duration": e.duration, "text": e.text}
            for e in yt.fetch(video_id, languages=['en'])
        ]
    except Exception as e:
        return f"Error fetching transcript: {e}"

    if not transcript:
        return "No transcript available for this video."

    # Join text and split into chunks
    full_text = " ".join(entry["text"] for entry in transcript)
    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
    chunks = splitter.create_documents([full_text])

    # Embed and create FAISS vector store
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    vector_store = FAISS.from_documents(chunks, embeddings)
    retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 4})

    # Retrieve relevant chunks
    retrieved_docs = retriever.invoke(question)
    context_text = "\n\n".join(doc.page_content for doc in retrieved_docs)

    # Create prompt and get answer
    final_prompt = prompt.invoke({"context": context_text, "question": question})
    answer = model.invoke(final_prompt)

    return answer.content

# Build Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# YouTube Transcript QA Chatbot")
    url_input = gr.Textbox(label="YouTube Video URL", placeholder="Enter YouTube video URL here")
    question_input = gr.Textbox(label="Question", placeholder="Ask something about the video")
    output = gr.Textbox(label="Answer", interactive=False)

    btn = gr.Button("Get Answer")
    btn.click(fn=answer_from_youtube, inputs=[url_input, question_input], outputs=output)

demo.launch()


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://9c15af538b42528cec.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [61]:
import time

# Before optimization (e.g., no caching)
start = time.time()

# Run your slow function here (e.g., build vector store without cache)
vector_store = FAISS.from_documents(chunks, embeddings)

end = time.time()
original_latency = end - start
print(f"Original latency: {original_latency:.2f} seconds")

# After optimization (e.g., with caching)
start = time.time()

# Run your optimized function (e.g., get_vector_store which uses cache)
vector_store = get_vector_store("Ks-_Mh1QhMc", chunks, embeddings)

end = time.time()
optimized_latency = end - start
print(f"Optimized latency: {optimized_latency:.2f} seconds")

# Calculate % latency reduction
latency_reduction = ((original_latency - optimized_latency) / original_latency) * 100
print(f"Latency reduced by: {latency_reduction:.2f}%")


Original latency: 3.27 seconds
Optimized latency: 0.00 seconds
Latency reduced by: 100.00%
