# üé¨ YouTube Video Transcription - One-Click Setup

**Super simple - just 2 steps:**

1. **Run the cell below** ‚¨áÔ∏è (takes 2-3 minutes to prepare)
2. **Paste your YouTube URL** when prompted
3. **That's it!** Transcript auto-downloads when done

Works with any YouTube video URL (even long ones!)

üí° **Tip:** Go to Runtime ‚Üí Change runtime type ‚Üí GPU (optional, makes it faster)

In [None]:
#!/usr/bin/env python3
"""
ONE-STEP YOUTUBE TRANSCRIPTION
Setup + Download + Transcribe + Save (automatic)
"""

print("\n" + "="*70)
print("üé¨  INSTALLING & SETTING UP YOUTUBE TRANSCRIPTION (ONE TIME)")
print("="*70 + "\n")

# Step 1: Install dependencies
print("üì¶ Installing dependencies...")
import subprocess
subprocess.run(['pip', 'install', '-q', 'openai-whisper'], check=True)
subprocess.run(['pip', 'install', '-q', 'yt-dlp'], check=True)
subprocess.run(['apt-get', '-qq', 'install', '-y', 'ffmpeg'], check=True)
print("‚úì Dependencies installed\n")

# Step 2: Import libraries
print("üìö Importing libraries...")
import whisper
import os
from pathlib import Path
from google.colab import files
import torch
import re
print("‚úì Libraries ready\n")

# Step 3: Load model
print("ü§ñ Loading Whisper Large model...")
print("   (This takes ~1-2 minutes first time)\n")
model = whisper.load_model("large")
print("‚úì Model loaded\n")

# Step 4: Get YouTube URL
print("="*70)
print("üîó ENTER YOUTUBE URL")
print("="*70)
youtube_url = input("Paste YouTube URL here: ").strip()

if not youtube_url:
    print("‚ö†Ô∏è  No URL provided. Please try again.")
else:
    print(f"\n‚úì URL received: {youtube_url}\n")
    
    # Step 5: Download audio from YouTube
    print("="*70)
    print("üì• DOWNLOADING AUDIO FROM YOUTUBE")
    print("="*70 + "\n")
    
    audio_file = "youtube_audio.mp3"
    
    try:
        result = subprocess.run([
            'yt-dlp',
            '-x',  # Extract audio
            '--audio-format', 'mp3',
            '--audio-quality', '0',  # Best quality
            '-o', audio_file.replace('.mp3', '.%(ext)s'),
            '--no-playlist',  # Only download single video
            youtube_url
        ], check=True, capture_output=True, text=True)
        
        print(f"‚úì Audio downloaded: {audio_file}\n")
        
        # Step 6: Transcribe
        print("="*70)
        print("üéôÔ∏è  TRANSCRIBING (wait 2-5 minutes depending on length)")
        print("="*70 + "\n")
        
        def format_timestamp(seconds):
            hours = int(seconds // 3600)
            minutes = int((seconds % 3600) // 60)
            secs = int(seconds % 60)
            return f"{hours:02d}:{minutes:02d}:{secs:02d}"
        
        print(f"üìù Processing: {audio_file}")
        print("-" * 70)
        
        # Transcribe (auto-detect language)
        result = model.transcribe(
            audio_file,
            task='transcribe',
            verbose=False
        )
        
        detected_language = result['language']
        print(f"‚úì Detected language: {detected_language}\n")
        
        # Create safe filename from URL
        video_id_match = re.search(r'(?:v=|youtu\.be/)([^&]+)', youtube_url)
        video_id = video_id_match.group(1) if video_id_match else 'youtube'
        transcript_path = f'youtube_{video_id}_transcript.txt'
        
        with open(transcript_path, 'w', encoding='utf-8') as f:
            f.write(f"YouTube Transcript\n")
            f.write(f"URL: {youtube_url}\n")
            f.write(f"Language: {detected_language}\n")
            f.write("=" * 70 + "\n\n")
            f.write(result['text'].strip())
            f.write("\n\n" + "=" * 70 + "\n")
            f.write("Detailed segments:\n\n")
            
            for segment in result['segments']:
                start = format_timestamp(segment['start'])
                end = format_timestamp(segment['end'])
                text = segment['text'].strip()
                f.write(f"[{start} ‚Üí {end}] {text}\n")
        
        print(f"‚úì Transcript created: {transcript_path}\n")
        
        # Show preview
        print("üìã Preview:")
        print("-" * 70)
        preview = result['text'][:300]
        print(preview)
        if len(result['text']) > 300:
            print("...(more)")
        print("-" * 70)
        
        # Step 7: Auto-download
        print(f"\nüì• Downloading transcript...")
        files.download(transcript_path)
        print(f"‚úì Downloaded: {transcript_path}\n")
        
        # Cleanup
        if os.path.exists(audio_file):
            os.remove(audio_file)
            print("‚úì Cleaned up temporary audio file\n")
        
    except subprocess.CalledProcessError as e:
        print(f"‚ùå Error downloading video: {e}")
        print("Make sure the URL is valid and the video is accessible.")
    except Exception as e:
        print(f"‚ùå Error: {e}")

print("\n" + "="*70)
print("üéâ ALL DONE!")
print("="*70)
print("Your transcript is downloaded to ~/Downloads/\n")
print("Next step: Run this in terminal to organize:")
print("   python3 scripts/trigger_colab.py --organize")
print("="*70 + "\n")

## ‚ú® That's it!

Your transcript is ready and automatically downloaded.

**Back in terminal, run:**
```
python3 scripts/trigger_colab.py --organize
```

This moves your transcript to the `project/transcripts/` folder. Done! üéâ