# Youtube Video Summarizer

In [1]:
# imports

import os
from dotenv import load_dotenv
from IPython.display import Markdown, display
from openai import OpenAI

In [2]:
!pip install youtube_transcript_api




[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: C:\Users\rathi\AppData\Local\Programs\Python\Python310\python.exe -m pip install --upgrade pip


In [3]:
from youtube_transcript_api import YouTubeTranscriptApi

In [4]:
# Load environment variables in a file called .env

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

In [5]:
import re
from youtube_transcript_api import YouTubeTranscriptApi

class YouTubeWebLink:
    def __init__(self, url):
        self.url = url
        self.video_id = self.get_video_id(url)
        self.set_openai_client()
        self.set_system_prompt()

    def get_video_id(self, url):
        """ extract youtube video id from url with regular expression """
        regex = r"(?:v=|be/)([a-zA-Z0-9_-]{11})"
        match = re.search(regex, url)
        if match:
            return match.group(1)
        else:
            raise ValueError("Probably not a YouTube URL")
        
    def set_openai_client(self):
        self.openai = OpenAI()
        
    def set_system_prompt(self, system_prompt=None):
        """ set system prompt from youtube video """
        self.system_prompt = """
        You are a skilled explainer and storyteller who specializes in summarizing YouTube video transcripts in a way that's both engaging and informative. 
        Your task is to:
        - Capture key points and main ideas of the video
        - Structure your summary with in clear sections
        - Include important details, facts, and figures mentioned
        - Never end your summary with a "Conclusion" section
        - Keep the summary short and easy to understand
        - Always format your response in markdown for better readability
        """ if system_prompt is None else system_prompt

    def get_transcript(self):
        """ get transcript from youtube video """
        try:
           print('Fetching video transcript...')
           transcript = YouTubeTranscriptApi().fetch(self.video_id)
           # Fixed: Access the text attribute instead of dictionary key
           return " ".join([item.text for item in transcript])
        except Exception as e:
           print(f"Error fetching transcript: {e}")
           return None
        
    def get_summary_from_transcript(self, transcript):
        """ summarize text using openai """
        try:
            print('Summarizing video...')
            response = self.openai.chat.completions.create(
                model="gpt-4o-mini",
                messages=[
                    {"role": "system", "content": self.system_prompt},
                    {"role": "user", "content": f"Summarize the following YouTube video transcript:\n\n{transcript}"}
                ]
            )
            return response.choices[0].message.content
        except Exception as e:
            print(f"Error summarizing text: {e}")
            return None

    def display_summary(self):
        """ summarize youtube video """
        transcript = self.get_transcript()
        if transcript:
            summary = self.get_summary_from_transcript(transcript)
            if summary:
                display(Markdown(summary))
            else:
                print("Failed to generate summary")
        else:
            print("Failed to fetch transcript")

In [6]:
# video link and share link of same youtube video
test_url_1 = "https://www.youtube.com/watch?v=pu3-PeBG0YU"
test_url_2 = "https://www.youtube.com/watch?v=EDb37y_MhRw"

In [7]:
# Test that we get same id
video1, video2 = YouTubeWebLink(test_url_1), YouTubeWebLink(test_url_2)
video1.video_id, video2.video_id

('pu3-PeBG0YU', 'EDb37y_MhRw')

In [8]:
video1.display_summary()

Fetching video transcript...
Summarizing video...


# Summary of Generative AI Fine-Tuning

## Introduction to Fine-Tuning Generative AI
The video discusses how to specialize a large language model (LLM) for specific use cases without needing advanced technical skills. It emphasizes that while LLMs are effective for general queries, they excel only when trained on domain-specific data.

## Steps to Fine-Tuning an AI Model
The process of fine-tuning involves three main steps:

1. **Data Curation**
   - Collect relevant data that reflects the knowledge and skills you want the model to acquire.
   - The video demonstrates the use of the open-source project **InstructLab**, making it accessible for anyone to contribute.

2. **Synthetic Data Generation**
   - Fine-tuning typically requires large datasets, so the presenter explains how to use an LLM running locally to create synthetic data from curated examples.
   - For instance, a question about Oscar nominations is posed to demonstrate the model's initial inaccuracies.

3. **Parameter Efficient Fine-Tuning**
   - The final step involves integrating the new knowledge into the model with minimal updates to its parameters, allowing the process to be performed on a consumer-grade laptop.

## Practical Application of InstructLab
- The video showcases how to set up InstructLab and prepare a working directory.
- It illustrates the creation of a hierarchy for organizing training data and generating additional examples based on initial inputs.

## Results of Fine-Tuning
- After fine-tuning, the model is tested with the example question about Oscar nominations, providing the correct answer, “Oppenheimer.”
- The process demonstrates the transformation from a general model to a specialized one capable of producing accurate domain-specific responses.

## Community and Future Potential
- The presenter highlights the vision of open-source AI, advocating for community contributions and collaboration on specialized models.
- Use cases are provided, including applications in insurance claims management and legal contract processing.

## Call to Action
To conclude, the video encourages viewers to explore the possibilities of creating specialized AI models and invites interaction through comments regarding users' interests in this technology. 

The presentation offers an empowering message that anyone can fine-tune an LLM using accessible tools and contribute to the growing AI landscape, making it a valuable resource for professionals across various fields.