# Get Latest Channel Videos with Transcripts
This notebook allows you to specify the number of latest videos to process from a YouTube channel.

In [ ]:
import os
import requests
from youtube_transcript_api import YouTubeTranscriptApi
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from datetime import datetime

# Configuration
api_key = 'AIzaSyBr-juudGbZtHf4xCTFtxF065SfO8b3YQU'
PROXY_URL = "http://f3138bb7d6946fd998eb:9a590d5c36b57e6f@gw.dataimpulse.com:823"
PROXIES = {"https": PROXY_URL, "http": PROXY_URL}

youtube = build('youtube', 'v3', developerKey=api_key)

In [None]:
def get_channel_id(handle):
    """Get channel ID from channel handle"""
    url = f'https://www.googleapis.com/youtube/v3/channels?part=id&forHandle={handle}&key={api_key}'
    response = requests.get(url, timeout=10)
    
    if response.status_code == 200:
        data = response.json()
        if 'items' in data and len(data['items']) > 0:
            return data['items'][0]['id']
        else:
            print(f"No channel found with the handle: {handle}")
            return None
    else:
        print(f"Error retrieving channel ID. Status code: {response.status_code}")
        return None

In [None]:
def get_latest_channel_videos(channel_id, max_videos=10):
    """Get the latest N videos from a YouTube channel"""
    try:
        # Get channel uploads playlist
        channel_response = youtube.channels().list(
            part='contentDetails',
            id=channel_id
        ).execute()

        if 'items' not in channel_response or len(channel_response['items']) == 0:
            print(f"No channel found with the ID: {channel_id}")
            return []

        uploads_playlist_id = channel_response['items'][0]['contentDetails']['relatedPlaylists']['uploads']
        print(f"Found uploads playlist: {uploads_playlist_id}")

        # Get videos from uploads playlist (limited to max_videos)
        videos = []
        next_page_token = None
        videos_collected = 0
        
        while videos_collected < max_videos:
            # Calculate how many videos to request in this batch
            batch_size = min(50, max_videos - videos_collected)
            
            playlist_response = youtube.playlistItems().list(
                part='snippet',
                playlistId=uploads_playlist_id,
                maxResults=batch_size,
                pageToken=next_page_token
            ).execute()
            
            batch_videos = playlist_response['items']
            videos.extend(batch_videos)
            videos_collected += len(batch_videos)
            
            next_page_token = playlist_response.get('nextPageToken')
            
            # Break if no more videos or we've collected enough
            if not next_page_token or videos_collected >= max_videos:
                break

        # Limit to exactly max_videos
        videos = videos[:max_videos]
        
        video_data = []
        for video in videos:
            video_id = video['snippet']['resourceId']['videoId']
            video_url = f"https://www.youtube.com/watch?v={video_id}"
            video_title = video['snippet']['title']
            published_at = video['snippet']['publishedAt']
            
            video_data.append({
                'URL': video_url,
                'Title': video_title,
                'VideoId': video_id,
                'PublishedAt': published_at
            })

        print(f"Found {len(video_data)} videos (requested: {max_videos})")
        return video_data

    except HttpError as e:
        print(f"An HTTP error occurred: {e}")
        return []
    except KeyError as e:
        print(f"Unexpected response structure: {e}")
        return []

In [None]:
def get_transcript(video_id):
    """Get transcript for a video using proxy"""
    try:
        transcript_list = YouTubeTranscriptApi.get_transcript(video_id, proxies=PROXIES)
        transcript_text = ''
        for item in transcript_list:
            timestamp = item['start']
            text = item['text']
            transcript_text += f'[{timestamp}] {text}\n'
        return transcript_text
    except Exception as e:
        print(f'Error fetching transcript for video {video_id}: {str(e)}')
        return None

In [None]:
def process_channel_videos(channel_handle, max_videos=10, output_file=None):
    """Main function to process channel videos"""
    print(f"Processing latest {max_videos} videos from @{channel_handle}")
    
    # Get channel ID
    channel_id = get_channel_id(channel_handle)
    if not channel_id:
        print(f"Failed to retrieve channel ID for handle: {channel_handle}")
        return
    
    print(f"Channel ID: {channel_id}")
    
    # Get latest videos
    videos = get_latest_channel_videos(channel_id, max_videos)
    if not videos:
        print("No videos found")
        return
    
    # Generate output filename if not provided
    if not output_file:
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        output_file = f'{channel_handle}_latest_{max_videos}_videos_{timestamp}.txt'
    
    print(f"\nProcessing {len(videos)} videos...")
    
    # Process videos and save transcripts
    with open(output_file, 'w', encoding='utf-8') as file:
        file.write(f"Latest {max_videos} videos from @{channel_handle}\n")
        file.write(f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
        file.write("=" * 80 + "\n\n")
        
        successful_transcripts = 0
        
        for i, video in enumerate(videos, 1):
            print(f"Processing video {i}/{len(videos)}: {video['Title'][:50]}...")
            
            file.write(f"Video #{i}\n")
            file.write(f"URL: {video['URL']}\n")
            file.write(f"Title: {video['Title']}\n")
            file.write(f"Published: {video['PublishedAt']}\n")
            file.write("-" * 40 + "\n")
            
            transcript = get_transcript(video['VideoId'])
            if transcript:
                file.write(transcript)
                successful_transcripts += 1
            else:
                file.write("[No transcript available]\n")
            
            file.write('\n' + '=' * 80 + '\n\n')
    
    print(f"\nProcessing complete!")
    print(f"Successfully processed {successful_transcripts}/{len(videos)} transcripts")
    print(f"Results saved to: {output_file}")
    
    return output_file

## Usage Examples

Configure the parameters below and run the cell to process videos:

In [None]:
# Configuration - Modify these parameters as needed
CHANNEL_HANDLE = 'TheRandallCarlson'  # Channel handle (without @)
MAX_VIDEOS = 5  # Number of latest videos to process
OUTPUT_FILE = None  # Optional: specify output filename, or leave None for auto-generated name

# Run the processing
result_file = process_channel_videos(CHANNEL_HANDLE, MAX_VIDEOS, OUTPUT_FILE)

## Alternative: Process Different Channels

You can also process different channels by running individual cells:

In [None]:
# Example: Get latest 3 videos from a different channel
# process_channel_videos('mkbhd', 3)

In [None]:
# Example: Get latest 10 videos with custom output file
# process_channel_videos('veritasium', 10, 'veritasium_latest_10.txt')