In [None]:
!pip install transformers sentence-transformers langchain faiss-cpu gradio youtube-transcript-api
!pip install langchain-community langchain
!pip install --upgrade langchain
#pip show langchain
!pip show youtube-transcript-api
!pip install --upgrade youtube-transcript-api



Name: youtube-transcript-api
Version: 1.2.3
Summary: This is an python API which allows you to get the transcripts/subtitles for a given YouTube video. It also works for automatically generated subtitles, supports translating subtitles and it does not require a headless browser, like other selenium based solutions do!
Home-page: https://github.com/jdepoix/youtube-transcript-api
Author: Jonas Depoix
Author-email: jonas.depoix@web.de
License: MIT
Location: /usr/local/lib/python3.12/dist-packages
Requires: defusedxml, requests
Required-by: 


In [None]:
import gradio as gr
import re
from youtube_transcript_api import YouTubeTranscriptApi
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.llms import HuggingFacePipeline
#from langchain_community.chains import LLMChain
from langchain_core.runnables import RunnableSequence
#from langchain.chains import LLMChain
from langchain_core.prompts import PromptTemplate
from transformers import pipeline
from sentence_transformers import SentenceTransformer

# Extract YouTube video ID from URL
def get_video_id(url):
    #pattern = r'https:\/\/www\.youtube\.com\/watch\?v=([a-zA-Z0-9_-]{11})'
    pattern = r"(?:v=|\/)([0-9A-Za-z_-]{11}).*"
    match = re.search(pattern, url)
    return match.group(1) if match else None

# Retrieve English transcript (manual preferred, falls back to auto-generated)
def get_transcript(url):
    video_id = get_video_id(url)
    if not video_id:
        return None, "Invalid YouTube URL."
    try:
        #transcripts = YouTubeTranscriptApi.list_transcripts(video_id)
        #transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
        ytt_api = YouTubeTranscriptApi()
        fetched = ytt_api.fetch(video_id)
        transcript_list = fetched.to_raw_data()
        if not transcript_list:
            return None, "No English transcript available for this video."
        return transcript_list, None
        transcript_obj = None
        # Prefer manual English transcript
        for t in transcripts:
            if t.language_code == 'en':
                transcript_obj = t
                break

        if not transcript_obj:
            return None, "No English transcript available for this video."

        fetched_transcript = transcript_obj.fetch()  # list of FetchedTranscriptSnippet
        # Convert to list of dicts with 'text' and 'start'
        #transcript_list = [{"text": snippet.text, "start": snippet.start} for snippet in fetched_transcript]
        transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
        return transcript_list, None

    # except TranscriptsDisabled:
    #      return None, "Transcripts are disabled for this video."
    # except NoTranscriptFound:
    #     return None, "No transcripts found for this video."
    except Exception as e:
        return None, f"Error fetching transcript: {str(e)}"
    # ytt_api = YouTubeTranscriptApi()
    # transcripts = ytt_api.list(video_id)
    # transcript = ""
    # for t in transcripts:
    #     if t.language_code == 'en':
    #         if t.is_generated:
    #             if not transcript:
    #                 transcript = t.fetch()
    #         else:
    #             transcript = t.fetch()
    #             break
    # return transcript if transcript else None

# Process transcript into text block
def process(transcript):
    txt = ""
    for i in transcript:
        txt += f"Text: {i['text']} Start: {i['start']}\n"
    return txt

# Chunk long transcript for embedding and search
def chunk_transcript(processed_transcript, chunk_size=1000, chunk_overlap=100):
    splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    chunks = splitter.split_text(processed_transcript)
    return chunks

# Initialize local sentence-transformer embedding model
def initialize_embedding_model():
    model_name = "sentence-transformers/all-MiniLM-L6-v2"
    return HuggingFaceEmbeddings(model_name=model_name)

# Initialize Hugging Face text-generation pipeline for summarization and QA
def initialize_llm():
    # Using a smaller open source model for Colab; replace with your preference
    hf_pipe = pipeline(
        "text-generation",
        model="tiiuae/falcon-7b-instruct",  # Replace with smaller model if needed, e.g. "gpt2"
        device=0,  # Use GPU if available on Colab
        max_length=512,
        do_sample=False,
    )
    return HuggingFacePipeline(pipeline=hf_pipe)

# Create FAISS vector index for semantic search
def create_faiss_index(chunks, embedding_model):
    return FAISS.from_texts(chunks, embedding_model)

# Prompt template for summary
def create_summary_prompt():
    template = """
You are an AI assistant summarizing this YouTube video transcript.
Summarize the transcript in one concise paragraph focusing on main points.

Transcript:
{transcript}
"""
    return PromptTemplate(input_variables=["transcript"], template=template)

# Prompt template for Q&A
def create_qa_prompt_template():
    template = """
You are an expert assistant answering questions based on the following video content:

Context:
{context}

Question:
{question}
"""
    return PromptTemplate(input_variables=["context", "question"], template=template)

# # LangChain chains for summary and Q&A
# def create_summary_chain(llm, prompt):
#     return LLMChain(llm=llm, prompt=prompt, verbose=False)

# def create_qa_chain(llm, prompt):
#     return LLMChain(llm=llm, prompt=prompt, verbose=False)

def create_summary_chain(llm, prompt):
    # equivalent of LLMChain(prompt -> llm)
    return prompt | llm

def create_qa_chain(llm, prompt):
    return prompt | llm

# Summarize function
def summarize_video(video_url):
    if not video_url:
        return "Please enter a valid YouTube URL."
    transcript,error = get_transcript(video_url)
    if error:
        return error
    if transcript is None:
        return "No English transcript available for this video."

    if not transcript:
        return "No English transcript available for this video."
    processed_transcript = process(transcript)
    llm = initialize_llm()
    summary_prompt = create_summary_prompt()
    summary_chain = create_summary_chain(llm, summary_prompt)
    #summary = summary_chain.run({"transcript": processed_transcript})
    summary = summary_chain.invoke({"transcript": processed_transcript})

    return summary.strip()

# Answer question function
def answer_question(video_url, question):
    if not video_url:
        return "Please enter a valid YouTube URL."
    if not question:
        return "Please enter a question."

    transcript, error = get_transcript(video_url)
    if error:
        return error

    processed_transcript = process(transcript)
    chunks = chunk_transcript(processed_transcript)
    embedding_model = initialize_embedding_model()
    faiss_index = create_faiss_index(chunks, embedding_model)
    llm = initialize_llm()
    qa_prompt = create_qa_prompt_template()
    qa_chain = create_qa_chain(llm, qa_prompt)

    # Retrieve similar chunks from FAISS for context
    relevant_docs = faiss_index.similarity_search(question, k=5)
    context = "\n\n".join([doc.page_content for doc in relevant_docs])

    answer = qa_chain.invoke({"context": context, "question": question})
    return answer.strip()


# Gradio user interface
with gr.Blocks() as demo:
    gr.Markdown("# YouTube Video Summarizer & Q&A with Hugging Face on Colab")
    url_input = gr.Textbox(label="YouTube Video URL", placeholder="Enter YouTube video URL here...")
    summarize_btn = gr.Button("Summarize Video")
    summary_output = gr.Textbox(label="Video Summary", lines=5)
    question_input = gr.Textbox(label="Ask a question about the video")
    question_btn = gr.Button("Get Answer")
    answer_output = gr.Textbox(label="Answer", lines=5)

    summarize_btn.click(summarize_video, inputs=url_input, outputs=summary_output)
    question_btn.click(answer_question, inputs=[url_input, question_input], outputs=answer_output)

demo.launch(share=True,debug=True)

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://da89357e8ce28e7736.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json: 0.00B [00:00, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.48G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.95G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/117 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/281 [00:00<?, ?B/s]

Device set to use cuda:0
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
  return HuggingFacePipeline(pipeline=hf_pipe)
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
  return HuggingFaceEmbeddings(model_name=model_name)


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cuda:0
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cuda:0
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://da89357e8ce28e7736.gradio.live


