In [None]:
import os
import pandas as pd
from googleapiclient.discovery import build

In [None]:
!pip install google-api-python-client pandas



In [None]:
API_KEY = ''
YOUTUBE_API_SERVICE_NAME = 'youtube'
YOUTUBE_API_VERSION = 'v3'

youtube = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION, developerKey=API_KEY)
import time

def safe_execute(request, retries=5):
    for i in range(retries):
        try:
            return request.execute()
        except HttpError as e:
            if "quotaExceeded" in str(e):
                print("Quota exceeded. Retrying in 5 minutes...")
                time.sleep(300)  # Wait for 5 minutes
            else:
                raise

def fetch_videos_by_genre_safe(genre, max_results=500):
    """Fetch videos with quota-exceeded retry logic."""
    videos = []
    next_page_token = None

    while len(videos) < max_results:
        try:
            request = youtube.search().list(
                q=genre,
                type='video',
                part='id,snippet',
                maxResults=50,
                pageToken=next_page_token
            )
            response = safe_execute(request)

            for item in response.get('items', []):
                videos.append({
                    'video_id': item['id']['videoId'],
                    'title': item['snippet']['title'],
                    'description': item['snippet']['description'],
                    'channel_title': item['snippet']['channelTitle'],
                    'published_at': item['snippet']['publishedAt']
                })

            next_page_token = response.get('nextPageToken')
            if not next_page_token:
                break
        except HttpError as e:
            print(f"An error occurred: {e}")
            break

    return videos[:max_results]

def fetch_video_details(video_ids):
    """Fetches detailed information about videos."""
    video_data = []
    for i in range(0, len(video_ids), 50):
        request = youtube.videos().list(
            part='snippet,statistics,contentDetails',
            id=','.join(video_ids[i:i+50])
        )
        response = request.execute()

        for item in response.get('items', []):
            video_data.append({
                'video_url': f"https://www.youtube.com/watch?v={item['id']}",
                'title': item['snippet']['title'],
                'description': item['snippet']['description'],
                'channel_title': item['snippet']['channelTitle'],
                'tags': ', '.join(item['snippet'].get('tags', [])),
                'category_id': item['snippet']['categoryId'],
                'published_at': item['snippet']['publishedAt'],
                'duration': item['contentDetails']['duration'],
                'view_count': int(item['statistics'].get('viewCount', 0)),
                'comment_count': int(item['statistics'].get('commentCount', 0)),
                'captions_available': check_captions_availability(item['id']),
            })

    return video_data

def check_captions_availability(video_id):
    """Checks if captions are available for a video."""
    try:
        request = youtube.captions().list(part='id', videoId=video_id)
        response = request.execute()
        return 'true' if response.get('items') else 'false'
    except:
        return 'false'

def fetch_video_category(category_id):
    """Maps category ID to category name."""
    request = youtube.videoCategories().list(
        part='snippet',
        id=category_id
    )
    response = request.execute()
    categories = {item['id']: item['snippet']['title'] for item in response.get('items', [])}
    return categories.get(category_id, "Unknown")

def save_to_csv(data, filename):
    """Saves data to a CSV file."""
    df = pd.DataFrame(data)
    df.to_csv(filename, index=False)
    print(f"Data saved to {filename}")

if __name__ == "__main__":
    genre_input = "comedy"
    max_videos = 500

    # Fetch video metadata
    video_list = fetch_videos_by_genre(genre_input, max_results=max_videos)
    video_ids = [video['video_id'] for video in video_list]

    # Fetch detailed video data
    detailed_videos = fetch_video_details(video_ids)

    # Add category names
    for video in detailed_videos:
        video['category'] = fetch_video_category(video['category_id'])

    # Save to CSV
    save_to_csv(detailed_videos, f"{genre_input}_top_videos.csv")



Fetching top 500 videos for genre: comedy


HttpError: <HttpError 403 when requesting https://youtube.googleapis.com/youtube/v3/search?q=comedy&type=video&part=id%2Csnippet&maxResults=50&key=AIzaSyDxb-tOGCjr3ET3YFcHNcMKRRIefUkRTTE&alt=json returned "The request cannot be completed because you have exceeded your <a href="/youtube/v3/getting-started#quota">quota</a>.". Details: "[{'message': 'The request cannot be completed because you have exceeded your <a href="/youtube/v3/getting-started#quota">quota</a>.', 'domain': 'youtube.quota', 'reason': 'quotaExceeded'}]">

In [None]:
import os
import pandas as pd
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError

# YouTube API configuration
API_KEYS = [
   # Replace with your third YouTube Data API Key (add more if needed)
]
current_key_index = 0

def get_youtube_client():
    """Creates a YouTube API client with the current API key."""
    global current_key_index
    api_key = API_KEYS[current_key_index]
    return build('youtube', 'v3', developerKey=api_key)

youtube = get_youtube_client()

def rotate_api_key():
    """Rotates to the next API key."""
    global current_key_index, youtube
    current_key_index = (current_key_index + 1) % len(API_KEYS)
    youtube = get_youtube_client()
    print(f"Switched to API key {current_key_index + 1}")

def remove_expired_key():
    """Remove the expired API key from the pool."""
    global current_key_index, API_KEYS, youtube
    print(f"API key {current_key_index + 1} expired. Removing from the pool.")
    API_KEYS.pop(current_key_index)
    if not API_KEYS:
        raise Exception("No valid API keys available. Please add new keys.")
    current_key_index %= len(API_KEYS)


def fetch_videos_by_genre(genre, max_results=30):
    """Fetches video metadata by genre."""
    videos = []
    next_page_token = None

    print(f"Fetching top {max_results} videos for genre: {genre}")

    while len(videos) < max_results:
        try:
            request = youtube.search().list(
                q=genre,
                type='video',
                part='id,snippet',
                maxResults=5,
                pageToken=next_page_token
            )
            response = request.execute()

            for item in response.get('items', []):
                videos.append({
                    'video_id': item['id']['videoId'],
                    'title': item['snippet']['title'],
                    'description': item['snippet']['description'],
                    'channel_title': item['snippet']['channelTitle'],
                    'published_at': item['snippet']['publishedAt']
                })

            next_page_token = response.get('nextPageToken')
            if not next_page_token:
                break
        except HttpError as e:
            if "quotaExceeded" in str(e):
                print("Quota exceeded for the current API key. Rotating key...")
                rotate_api_key()
            else:
                raise

    return videos[:max_results]

def fetch_video_details(video_ids):
    """Fetches detailed information about videos."""
    video_data = []
    for i in range(0, len(video_ids), 50):
        try:
            request = youtube.videos().list(
                part='snippet,statistics,contentDetails',
                id=','.join(video_ids[i:i+50])
            )
            response = request.execute()

            for item in response.get('items', []):
                video_data.append({
                    'video_url': f"https://www.youtube.com/watch?v={item['id']}",
                    'title': item['snippet']['title'],
                    'description': item['snippet']['description'],
                    'channel_title': item['snippet']['channelTitle'],
                    'tags': ', '.join(item['snippet'].get('tags', [])),
                    'category_id': item['snippet']['categoryId'],
                    'published_at': item['snippet']['publishedAt'],
                    'duration': item['contentDetails']['duration'],
                    'view_count': int(item['statistics'].get('viewCount', 0)),
                    'comment_count': int(item['statistics'].get('commentCount', 0)),
                    'captions_available': check_captions_availability(item['id']),
                })
        except HttpError as e:
            if "quotaExceeded" in str(e):
                print("Quota exceeded for the current API key. Rotating key...")
                rotate_api_key()
                continue
            else:
                raise

    return video_data

def check_captions_availability(video_id):
    """Checks if captions are available for a video."""
    try:
        request = youtube.captions().list(part='id', videoId=video_id)
        response = request.execute()
        return 'true' if response.get('items') else 'false'
    except:
        return 'false'

def fetch_video_category(category_id):
    """Maps category ID to category name."""
    try:
        request = youtube.videoCategories().list(
            part='snippet',
            id=category_id
        )
        response = request.execute()
        categories = {item['id']: item['snippet']['title'] for item in response.get('items', [])}
        return categories.get(category_id, "Unknown")
    except HttpError as e:
        if "quotaExceeded" in str(e):
            print("Quota exceeded for the current API key. Rotating key...")
            rotate_api_key()
            return "Unknown"
        else:
            raise

def save_to_csv(data, filename):
    """Saves data to a CSV file."""
    df = pd.DataFrame(data)
    df.to_csv(filename, index=False)
    print(f"Data saved to {filename}")

if __name__ == "__main__":
    genre_input = "comedy"
    max_videos = 500

    # Fetch video metadata
    video_list = fetch_videos_by_genre(genre_input, max_results=max_videos)
    video_ids = [video['video_id'] for video in video_list]

    # Fetch detailed video data
    detailed_videos = fetch_video_details(video_ids)

    # Add category names
    for video in detailed_videos:
        video['category'] = fetch_video_category(video['category_id'])

    # Save to CSV
    save_to_csv(detailed_videos, f"{genre_input}_top_videos.csv")


Fetching top 500 videos for genre: comedy




Quota exceeded for the current API key. Rotating key...
Switched to API key 2
Quota exceeded for the current API key. Rotating key...
Switched to API key 3




Quota exceeded for the current API key. Rotating key...
Switched to API key 4
Quota exceeded for the current API key. Rotating key...
Switched to API key 5




Quota exceeded for the current API key. Rotating key...
Switched to API key 1
Quota exceeded for the current API key. Rotating key...
Switched to API key 2




Quota exceeded for the current API key. Rotating key...
Switched to API key 3
Quota exceeded for the current API key. Rotating key...
Switched to API key 4




Quota exceeded for the current API key. Rotating key...
Switched to API key 5
Quota exceeded for the current API key. Rotating key...
Switched to API key 1




Quota exceeded for the current API key. Rotating key...
Switched to API key 2




Quota exceeded for the current API key. Rotating key...
Switched to API key 3
Quota exceeded for the current API key. Rotating key...
Switched to API key 4




Quota exceeded for the current API key. Rotating key...
Switched to API key 5
Quota exceeded for the current API key. Rotating key...
Switched to API key 1




Quota exceeded for the current API key. Rotating key...
Switched to API key 2
Quota exceeded for the current API key. Rotating key...
Switched to API key 3




Quota exceeded for the current API key. Rotating key...
Switched to API key 4
Quota exceeded for the current API key. Rotating key...
Switched to API key 5




Quota exceeded for the current API key. Rotating key...
Switched to API key 1
Quota exceeded for the current API key. Rotating key...
Switched to API key 2




Quota exceeded for the current API key. Rotating key...
Switched to API key 3
Quota exceeded for the current API key. Rotating key...
Switched to API key 4




Quota exceeded for the current API key. Rotating key...
Switched to API key 5
Quota exceeded for the current API key. Rotating key...
Switched to API key 1




Quota exceeded for the current API key. Rotating key...
Switched to API key 2
Quota exceeded for the current API key. Rotating key...
Switched to API key 3




Quota exceeded for the current API key. Rotating key...
Switched to API key 4
Quota exceeded for the current API key. Rotating key...
Switched to API key 5




Quota exceeded for the current API key. Rotating key...
Switched to API key 1
Quota exceeded for the current API key. Rotating key...
Switched to API key 2




Quota exceeded for the current API key. Rotating key...
Switched to API key 3
Quota exceeded for the current API key. Rotating key...
Switched to API key 4




Quota exceeded for the current API key. Rotating key...
Switched to API key 5
Quota exceeded for the current API key. Rotating key...
Switched to API key 1




Quota exceeded for the current API key. Rotating key...
Switched to API key 2
Quota exceeded for the current API key. Rotating key...
Switched to API key 3




Quota exceeded for the current API key. Rotating key...
Switched to API key 4
Quota exceeded for the current API key. Rotating key...
Switched to API key 5




Quota exceeded for the current API key. Rotating key...
Switched to API key 1
Quota exceeded for the current API key. Rotating key...
Switched to API key 2




Quota exceeded for the current API key. Rotating key...
Switched to API key 3
Quota exceeded for the current API key. Rotating key...
Switched to API key 4




Quota exceeded for the current API key. Rotating key...
Switched to API key 5
Quota exceeded for the current API key. Rotating key...
Switched to API key 1




Quota exceeded for the current API key. Rotating key...
Switched to API key 2
Quota exceeded for the current API key. Rotating key...
Switched to API key 3




Quota exceeded for the current API key. Rotating key...
Switched to API key 4
Quota exceeded for the current API key. Rotating key...




Switched to API key 5
Quota exceeded for the current API key. Rotating key...
Switched to API key 1




Quota exceeded for the current API key. Rotating key...
Switched to API key 2
Quota exceeded for the current API key. Rotating key...
Switched to API key 3




Quota exceeded for the current API key. Rotating key...
Switched to API key 4
Quota exceeded for the current API key. Rotating key...
Switched to API key 5




Quota exceeded for the current API key. Rotating key...
Switched to API key 1
Quota exceeded for the current API key. Rotating key...
Switched to API key 2




Quota exceeded for the current API key. Rotating key...
Switched to API key 3
Quota exceeded for the current API key. Rotating key...
Switched to API key 4




Quota exceeded for the current API key. Rotating key...
Switched to API key 5
Data saved to comedy_top_videos.csv


In [None]:
import os
import pandas as pd
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError

# YouTube API configuration
API_KEYS = [
    'AIzaSyAR97kmhQwhydMMv2WBuXhO-j1n9zYLxRQ',  # Replace with your first YouTube Data API Key
    'AIzaSyDeFOrauhm8wPktcCW6TMTQLRxkebRZqXo',  # Replace with your second YouTube Data API Key
    'AIzaSyCd_tUSiE8XnGpndD138L-dzN7aZi0c9dg',
    'AIzaSyDEpGHHaYGHjfC2ViOr74jqzriAQB4las8',
    'AIzaSyC6Z5w5a2Ns4JXmNzncPqM-9Ooz-bKzPJk'  # Replace with your third YouTube Data API Key (add more if needed)
]
current_key_index = 0

def get_youtube_client():
    """Creates a YouTube API client with the current API key."""
    global current_key_index
    api_key = API_KEYS[current_key_index]
    return build('youtube', 'v3', developerKey=api_key)

youtube = get_youtube_client()

def rotate_api_key():
    """Rotates to the next API key."""
    global current_key_index, youtube
    current_key_index = (current_key_index + 1) % len(API_KEYS)
    youtube = get_youtube_client()
    print(f"Switched to API key {current_key_index + 1}")

def fetch_videos_by_genre(genre, max_results=30):
    """Fetches video metadata by genre."""
    videos = []
    next_page_token = None

    print(f"Fetching top {max_results} videos for genre: {genre}")

    while len(videos) < max_results:
        try:
            request = youtube.search().list(
                q=genre,
                type='video',
                part='id,snippet',
                maxResults=5,
                pageToken=next_page_token
            )
            response = request.execute()

            for item in response.get('items', []):
                videos.append({
                    'video_id': item['id']['videoId'],
                    'title': item['snippet']['title'],
                    'description': item['snippet']['description'],
                    'channel_title': item['snippet']['channelTitle'],
                    'published_at': item['snippet']['publishedAt']
                })

            next_page_token = response.get('nextPageToken')
            if not next_page_token:
                break
        except HttpError as e:
            if "quotaExceeded" in str(e):
                print("Quota exceeded for the current API key. Rotating key...")
                rotate_api_key()
            else:
                raise

    return videos[:max_results]

def fetch_video_details(video_ids):
    """Fetches detailed information about videos."""
    video_data = []
    for i in range(0, len(video_ids), 50):
        try:
            request = youtube.videos().list(
                part='snippet,statistics,contentDetails,recordingDetails',
                id=','.join(video_ids[i:i+50])
            )
            response = request.execute()

            for item in response.get('items', []):
                video_data.append({
                    'video_url': f"https://www.youtube.com/watch?v={item['id']}",
                    'title': item['snippet']['title'],
                    'description': item['snippet']['description'],
                    'channel_title': item['snippet']['channelTitle'],
                    'tags': ', '.join(item['snippet'].get('tags', [])),
                    'category_id': item['snippet']['categoryId'],
                    'published_at': item['snippet']['publishedAt'],
                    'duration': item['contentDetails']['duration'],
                    'view_count': int(item['statistics'].get('viewCount', 0)),
                    'comment_count': int(item['statistics'].get('commentCount', 0)),
                    'captions_available': check_captions_availability(item['id']),
                    'location': item.get('recordingDetails', {}).get('location', "Unknown"),
                    'captions_text': fetch_captions_text(item['id']),
                })
        except HttpError as e:
            if "quotaExceeded" in str(e):
                print("Quota exceeded for the current API key. Rotating key...")
                rotate_api_key()
                continue
            else:
                raise

    return video_data

def check_captions_availability(video_id):
    """Checks if captions are available for a video."""
    try:
        request = youtube.captions().list(part='id', videoId=video_id)
        response = request.execute()
        return 'true' if response.get('items') else 'false'
    except:
        return 'false'

def fetch_captions_text(video_id):
    """Fetches captions text for a video if available."""
    try:
        request = youtube.captions().list(part='id', videoId=video_id)
        response = request.execute()
        if response.get('items'):
            caption_id = response['items'][0]['id']
            download_request = youtube.captions().download(id=caption_id, tfmt="srt")
            captions_response = download_request.execute()
            return captions_response.decode('utf-8')
    except HttpError:
        return "Captions unavailable"
    return "No captions"

def fetch_video_category(category_id):
    """Maps category ID to category name."""
    try:
        request = youtube.videoCategories().list(
            part='snippet',
            id=category_id
        )
        response = request.execute()
        categories = {item['id']: item['snippet']['title'] for item in response.get('items', [])}
        return categories.get(category_id, "Unknown")
    except HttpError as e:
        if "quotaExceeded" in str(e):
            print("Quota exceeded for the current API key. Rotating key...")
            rotate_api_key()
            return "Unknown"
        else:
            raise

def save_to_csv(data, filename):
    """Saves data to a CSV file."""
    df = pd.DataFrame(data)
    df.to_csv(filename, index=False)
    print(f"Data saved to {filename}")

if __name__ == "__main__":
    genre_input = input("Enter the genre of videos you want to fetch: ")
    max_videos = int(input("Enter the maximum number of videos to fetch: "))

    # Fetch video metadata
    video_list = fetch_videos_by_genre(genre_input, max_results=max_videos)
    video_ids = [video['video_id'] for video in video_list]

    # Fetch detailed video data
    detailed_videos = fetch_video_details(video_ids)

    # Add category names
    for video in detailed_videos:
        video['category'] = fetch_video_category(video['category_id'])

    # Save to CSV
    save_to_csv(detailed_videos, f"{genre_input}_top_videos.csv")


KeyboardInterrupt: Interrupted by user