***Import Libraries***

In [1]:
import gradio as gr
import os
import uuid
import whisper
import yt_dlp
import openai
from langchain.schema import Document
from dotenv import load_dotenv
from langchain.embeddings import OpenAIEmbeddings
from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI
from langchain.agents import initialize_agent, Tool, AgentType
from langsmith import traceable  # ✅ LangSmith tracing
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from pydub import AudioSegment
from langchain.callbacks import tracing_v2_enabled

  from .autonotebook import tqdm as notebook_tqdm


***Lang Chain Environment Setup***

In [2]:
load_dotenv()

os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = os.getenv("LANGCHAIN_PROJECT", "YouTube-Assistant-Project")

***Video-to-Text Document Pipeline***

In [3]:

@traceable(name="Download Audio From Youtube")


def download_audio_from_youtube(youtube_url, output_dir="downloads"):
    os.makedirs(output_dir, exist_ok=True)
    output_template = os.path.join(output_dir, f"{uuid.uuid4()}.%(ext)s")



    ydl_opts = {
        'format': 'bestaudio/best',
        'outtmpl': output_template,
        'quiet': True,
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'mp3',
            'preferredquality': '8',
        }],
    }

    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        info_dict = ydl.extract_info(youtube_url, download=True)
        audio_path = ydl.prepare_filename(info_dict).replace(".webm", ".mp3").replace(".mp4", ".mp3")
    return audio_path


def split_audio(audio_path: str, chunk_length_sec: int = 30):
    audio = AudioSegment.from_file(audio_path)
    chunk_length_ms = chunk_length_sec * 1000
    chunk_paths = []

    for i, start_ms in enumerate(range(0, len(audio), chunk_length_ms)):
        chunk = audio[start_ms : start_ms + chunk_length_ms]
        chunk_path = audio_path.replace(".mp3", f"_chunk{i}.mp3")
        chunk.export(chunk_path, format="mp3")
        chunk_paths.append(chunk_path)

    return chunk_paths

def transcribe_with_whisper(audio_path, model_size="base", chunk_length_sec=30):
    model = whisper.load_model(model_size)
    texts = []

    chunks = split_audio(audio_path, chunk_length_sec=chunk_length_sec)

    for chunk_path in chunks:
        result = model.transcribe(chunk_path)
        texts.append(result["text"])
        os.remove(chunk_path) 

    return "\n\n".join(texts)

from langchain.text_splitter import RecursiveCharacterTextSplitter

def convert_to_documents(texts):
    full_text = "\n\n".join(texts)

    splitter = RecursiveCharacterTextSplitter(chunk_size=700, chunk_overlap=100)
    chunks = splitter.split_text(full_text)

    return [Document(page_content=chunk, metadata={"source": "video"}) for chunk in chunks]
    


***Text Summarization***

In [4]:
from openai import OpenAI
import os

client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
@traceable(name="Summarize Text")
def summarize_text(text):
    response = client.chat.completions.create(
        model="gpt-3.5-turbo", 
        messages=[
            {"role": "system", "content": "You are a helpful assistant that summarizes text."},
            {"role": "user", "content": f"Please summarize the following text:\n\n{text}"}
        ],
        temperature=0.5
    )
    return response.choices[0].message.content


***Vectorstore Setup & Management***

In [5]:


load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY")

import openai
openai.api_key = openai_api_key

embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)

vectorstore = Chroma(
    embedding_function=embeddings,
    persist_directory="chroma_db",   
    collection_name="search-majc"       
)

def add_documents_to_vectorstore(docs: list[str]):
    """
    docs: A list of text (strings) or Document objects
    """
    documents = [
        doc if isinstance(doc, Document) else Document(page_content=doc)
        for doc in docs
    ]
    vectorstore.add_documents(documents)
    vectorstore.persist()  

def clear_vectorstore():
        vectorstore.delete(delete_all=True) 
        vectorstore.persist() 
    

  embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
  vectorstore = Chroma(


***RAG Chatbot Initialization***
Q&A Bot Initialization

In [6]:
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
)
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.agents import initialize_agent, Tool, AgentType
import os


@traceable(name="Initialize Chatbot")
def initialize_chatbot():
    llm = ChatOpenAI(
        temperature=0,
        openai_api_key=os.getenv("OPENAI_API_KEY")
    )

    retriever = vectorstore.as_retriever(
        search_kwargs={'k': 8, 'filter': {'source': 'video'}}
    )

    system_prompt = """
You are an intelligent assistant that only answers based on the **transcript of a YouTube video**.

Rules:
- ONLY use information from the transcript provided.
- DO NOT use outside knowledge, personal opinions, or make assumptions.
- If the answer is not found in the transcript, respond clearly with:
  "The information is not available in the video transcript."

Examples:
Question: What is the main topic of the video?
Answer: [Answer based on transcript]

Question: Who is the president of the United States?
Answer: The information is not available in the video transcript.

Be brief, clear, and always stay within the content of the transcript.
"""

    prompt = ChatPromptTemplate.from_messages([
        SystemMessagePromptTemplate.from_template(system_prompt),
        HumanMessagePromptTemplate.from_template("Question: {question}\n\nContext:\n{context}")
    ])

    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=retriever,
        chain_type_kwargs={"prompt": prompt}
    )

    tools = [
        Tool(
            name="Question Answering",
            func=qa_chain.run,
            description="Answers questions only based on the transcript of a YouTube video."
        )
    ]

    agent = initialize_agent(
        tools=tools,
        agent_type=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION,
        llm=llm,
        verbose=True
    )

    return agent


***Chatbot Interaction / Query Execution***

In [7]:

@traceable(name="Chat With Bot")
def chat_with_bot(user_input):
    agent = initialize_chatbot()
    response = agent.run(user_input)
    return response


***Deployment***

In [None]:

import gradio as gr   
chat_history = []

with gr.Blocks(title="YouTube AI Assistant", theme=gr.themes.Soft()) as interface:
    gr.Markdown("""
    <style>
        @import url('https://fonts.googleapis.com/css2?family=Noto+Sans+Arabic:wght@400;700&display=swap');
        html, body, input, textarea, button, .gr-chatbot, .gr-textbox, .gr-button, .gr-accordion {
            font-family: 'Noto Sans Arabic', sans-serif !important;
        }
    </style>
    <div style="text-align: center;">
        <h1 style="color:#3b82f6;">🎥 YouTube AI Assistant</h1>
        <p style="font-size: 16px;">Transcribe, Summarize, and Ask Questions About Any YouTube Video</p>
    </div>
    """)

    with gr.Tabs():
        with gr.TabItem("📼 Transcribe & Summarize"):
            with gr.Column():
                youtube_url = gr.Textbox(label="🔗 YouTube URL", placeholder="Paste YouTube video link here...", lines=1)
                submit_btn = gr.Button("▶️ Process Video")

            with gr.Accordion("📝 Transcribed Text", open=False):
                transcript_output = gr.Textbox(label="Transcript", lines=10, interactive=False)

            with gr.Accordion("🧠 Summary", open=False):
                summary_output = gr.Textbox(label="Summary", lines=5, interactive=False)

            hidden_text = gr.Textbox(visible=False)

            def transcribe_and_summarize(youtube_url):
                audio_path = download_audio_from_youtube(youtube_url)
                transcribed_text = transcribe_with_whisper(audio_path)
                documents = convert_to_documents([transcribed_text])
                add_documents_to_vectorstore(documents)
                summary = summarize_text(transcribed_text)
                return transcribed_text, summary, transcribed_text

            submit_btn.click(
                transcribe_and_summarize,
                inputs=youtube_url,
                outputs=[transcript_output, summary_output, hidden_text]
            )

        with gr.TabItem("🤖 Ask the Chatbot"):
            chatbot_display = gr.Chatbot(label="💬 Chat with the Bot", height=400)
            with gr.Row():
                user_question = gr.Textbox(placeholder="Type your message...", show_label=False, lines=1)
                submit_chat = gr.Button("📤 Send")

            def answer_question_with_chatbot(user_input):
                response = chat_with_bot(user_input)
                chat_history.append((user_input, response))
                return chat_history, ""

            submit_chat.click(
                answer_question_with_chatbot,
                inputs=user_question,
                outputs=[chatbot_display, user_question]
            )
            
            

interface.launch(share=True)



  chatbot_display = gr.Chatbot(label="💬 Chat with the Bot", height=400)


* Running on local URL:  http://127.0.0.1:7861
* Running on public URL: https://87783c265900da6b2b.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




                                                         

  llm = ChatOpenAI(
  agent = initialize_agent(
  response = agent.run(user_input)




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI should use the Question Answering tool to find out the topic of the video.
Action: Question Answering
Action Input: Transcript of the YouTube video[0m
Observation: [36;1m[1;3mThe video is about exploring the OSI model, its importance in networking, and how it is the blueprint behind emails, video streaming, and website visits. It also mentions testing OSI skills and hints at upcoming topics like IP addressing in the field of IT.[0m
Thought:[32;1m[1;3mI have gathered information about the topic of the video.
Final Answer: The topic of the video is exploring the OSI model and its importance in networking.[0m

[1m> Finished chain.[0m


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI should use the Question Answering tool to find the answer to this question.
Action: Question Answering
Action Input: "What is a PIPA?"[0m
Observation: [36;1m[1;3mAnswer: A PIPA, or automatic private IP addressing, is what h