In [12]:
from youtube_transcript_api import YouTubeTranscriptApi
from urllib.parse import urlparse, parse_qs
import requests

In [13]:
def get_video_id(youtube_url):
    query = urlparse(youtube_url)
    if query.hostname == 'youtu.be':
        return query.path[1:]
    if query.hostname in ('www.youtube.com', 'youtube.com'):
        if query.path == '/watch':
            return parse_qs(query.query)['v'][0]
    raise ValueError("Invalid YouTube URL")

In [14]:
def get_transcript(video_id):
    transcript = YouTubeTranscriptApi.get_transcript(video_id)
    return " ".join([entry['text'] for entry in transcript])

In [15]:
def chunk_transcript(text, max_words=2000):
    words = text.split()
    chunks = []
    for i in range(0, len(words), max_words):
        chunk = " ".join(words[i:i + max_words])
        chunks.append(chunk)
    return chunks

In [16]:
def summarize_with_ollama(transcript, model="llama3.2"):
    url = "http://localhost:11500/api/chat"

    system_prompt = (
        "You are an expert tutor. "
        "Your job is to analyze the following transcript and provide a detailed, "
        "clear, and structured summary with key points, examples, and explanations. "
        "Use Markdown formatting with headings, bullet points, and bold text for clarity. "
        "If possible, break down complex ideas into simple terms."
    )

    user_prompt = f"""
    Please read this transcript and create a detailed study guide:
    
    {transcript}
    """

    payload = {
        "model": model,
        "stream": False,
        "messages": [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ]
    }

    response = requests.post(url, json=payload)
    response.raise_for_status()
    result = response.json()

    return result["message"]["content"]


In [17]:
if __name__ == "__main__":
    yt_url = input("Enter YouTube URL: ")
    vid = get_video_id(yt_url)
    print("Video ID:", vid)

    transcript = get_transcript(vid)
    print("Transcript length:", len(transcript))

    chunks = chunk_transcript(transcript, max_words=2000)
    print(f"Total chunks: {len(chunks)}")

    summaries = []
    for i, chunk in enumerate(chunks):
        print(f"Summarizing chunk {i+1}/{len(chunks)}...")
        summary = summarize_with_ollama(chunk)
        summaries.append(summary)

    combined_summary = "\n\n".join(summaries)

    final_study_guide = summarize_with_ollama(
        combined_summary,
        model="llama3.2"
    )

    print("\n=== FINAL STUDY GUIDE ===\n")
    print(final_study_guide)

Video ID: _uQrJ0TkZlc
Transcript length: 265971
Total chunks: 26
Summarizing chunk 1/26...
Summarizing chunk 2/26...
Summarizing chunk 3/26...
Summarizing chunk 4/26...
Summarizing chunk 5/26...
Summarizing chunk 6/26...
Summarizing chunk 7/26...
Summarizing chunk 8/26...
Summarizing chunk 9/26...
Summarizing chunk 10/26...
Summarizing chunk 11/26...
Summarizing chunk 12/26...
Summarizing chunk 13/26...
Summarizing chunk 14/26...
Summarizing chunk 15/26...
Summarizing chunk 16/26...
Summarizing chunk 17/26...
Summarizing chunk 18/26...
Summarizing chunk 19/26...
Summarizing chunk 20/26...
Summarizing chunk 21/26...
Summarizing chunk 22/26...
Summarizing chunk 23/26...
Summarizing chunk 24/26...
Summarizing chunk 25/26...
Summarizing chunk 26/26...

=== FINAL STUDY GUIDE ===

I can provide a comprehensive response based on the provided specification.

**Machine Learning with Decision Trees**

Decision trees are a type of supervised learning algorithm used in machine learning. They work 