In [1]:
!pip uninstall -y youtube-transcript-api
!pip install youtube-transcript-api==0.6.1
!pip install langchain langchain-openai langgraph

Found existing installation: youtube-transcript-api 0.6.1
Uninstalling youtube-transcript-api-0.6.1:
  Successfully uninstalled youtube-transcript-api-0.6.1
Collecting youtube-transcript-api==0.6.1
  Using cached youtube_transcript_api-0.6.1-py3-none-any.whl.metadata (14 kB)
Using cached youtube_transcript_api-0.6.1-py3-none-any.whl (24 kB)
Installing collected packages: youtube-transcript-api
Successfully installed youtube-transcript-api-0.6.1


In [2]:
from google.colab import userdata
import os

# Get the API key from Colab secrets
os.environ["OPENAI_API_KEY"] = userdata.get('OPENAI_API_KEY')


In [6]:
from youtube_transcript_api import YouTubeTranscriptApi
from langchain.tools import tool
from langchain_openai import ChatOpenAI
from langgraph.prebuilt import create_react_agent  # Keep this import
import re
import warnings
warnings.filterwarnings('ignore', category=DeprecationWarning)

In [7]:
@tool
def get_youtube_transcript(video_id: str) -> str:
    """Fetch the transcript of a YouTube video by video ID."""
    try:
        transcript = YouTubeTranscriptApi.get_transcript(video_id)
        return " ".join([entry['text'] for entry in transcript])
    except Exception as e:
        return f"Error fetching transcript: {str(e)}"

In [8]:
def extract_video_id(url: str) -> str:
    """Extract video ID from YouTube URL"""
    patterns = [
        r'(?:youtube\.com\/watch\?v=|youtu\.be\/)([^&\n?]+)',
        r'youtube\.com\/embed\/([^&\n?]+)',
    ]
    for pattern in patterns:
        match = re.search(pattern, url)
        if match:
            return match.group(1)
    return url  # If no pattern matches, assume it's already an ID

In [9]:
# Create agent with tool
llm = ChatOpenAI(model="gpt-4o-mini")
agent = create_react_agent(llm, [get_youtube_transcript])

In [10]:
# Enhanced prompt for all educational components
def analyze_video(video_url: str):
    video_id = extract_video_id(video_url)

    prompt = f"""Get the transcript for video {video_id} and provide:

    1. **Summary**: A concise 2-3 paragraph summary of the video content
    2. **Key Concepts**: List 5-7 main concepts or topics covered
    3. **Quiz Questions**: Generate 5 multiple-choice questions with 4 options each and indicate the correct answer

    Format your response clearly with headers for each section."""

    result = agent.invoke({
        "messages": [("user", prompt)]
    })

    return result['messages'][-1].content

In [12]:
# Test it
video_url = input("Enter YouTube URL: ")
analysis = analyze_video(video_url)
print("\n" + "="*50)
print("EDUCATIONAL VIDEO ANALYSIS")
print("="*50 + "\n")
print(analysis)

Enter YouTube URL: https://www.youtube.com/watch?v=uIojjqSm0m4

EDUCATIONAL VIDEO ANALYSIS

It appears that I am currently unable to retrieve the transcript for the video with the ID "uIojjqSm0m4". This may be due to the video not having a transcript available or an issue with the request.

If you would like, you can provide me with key details or topics from the video, and I can help you create a summary, key concepts, and quiz questions based on that information. Alternatively, you could specify another video ID if you have one in mind.
