In [2]:
MRBEAST_ID = "UCJ0uqCI0Vqr2Rrt1HseGirg"
ANOTHERROOF_ID = "UCHEnZhUKjZSLYs3jJ0raKZA"

In [3]:
from googleapiclient.discovery import build
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, VideoUnavailable
from dotenv import load_dotenv
import os
import random

def fetch_video_ids(channel_id: str) -> list[str]:
    """
    Fetches all video IDs from a YouTube channel.

    Args:
        channel_id (str): The YouTube channel ID.

    Returns:
        list: A list of video IDs from the channel.
    """
    load_dotenv()
    api_key = os.getenv("API_KEY")
    if not api_key:
        raise ValueError("API key not found. Ensure it is set in the .env file.")

    video_ids = []
    next_page_token = None

    with build("youtube", "v3", developerKey=api_key) as youtube:
        while True:
            request = youtube.search().list(
                part="id",
                channelId=channel_id,
                maxResults=50,
                pageToken=next_page_token,
                type="video"
            )
            response = request.execute()
            video_ids.extend(item["id"]["videoId"] for item in response.get("items", []))
            next_page_token = response.get("nextPageToken")

            if not next_page_token:
                break

    print('(fetch_video_ids) Video ids retrieved:', len(video_ids))
    return video_ids

def fetch_random_video_transcripts(channel_id: str, sample_size: int=5) -> list:
    """
    Retrieves transcripts from a random sample of videos from a YouTube channel.

    Args:
        channel_id (str): The YouTube channel ID.
        sample_size (int): Number of random videos to retrieve transcripts from.

    Returns:
        list: A list of transcripts (or a message if transcripts are not available).

    Example Return Value:
        [
            [  # Transcript of video 1
                {"text": "Hello world!", "start": 0.0, "duration": 1.5},
                {"text": "Welcome to the channel.", "start": 1.5, "duration": 2.0},
                ...
            ],
            [  # Transcript of video 2
                {"text": "This is another video.", "start": 0.0, "duration": 1.2},
                {"text": "Stay tuned for more content.", "start": 1.2, "duration": 2.3},
                ...
            ],
            "Transcript not available.",  # Video 3 has no transcript
            ...
        ]
    """
    video_ids = fetch_video_ids(channel_id)
    if not video_ids:
        raise ValueError("No videos found for the given channel ID.")

    sample_video_ids = random.sample(video_ids, min(sample_size, len(video_ids)))
    transcripts = []

    for video_id in sample_video_ids:
        try:
            transcript = YouTubeTranscriptApi.get_transcript(video_id)
            transcripts.append(transcript)  # Add the transcript directly
        except (TranscriptsDisabled, VideoUnavailable):
            transcripts.append("Transcript not available.")

    print('(fetch_random_video_transcripts) Transcripts retrieved:', len(transcripts))
    return transcripts

fetch_random_video_transcripts(ANOTHERROOF_ID)

HttpError: <HttpError 403 when requesting https://youtube.googleapis.com/youtube/v3/search?part=id&channelId=UCJ0uqCI0Vqr2Rrt1HseGirg&maxResults=50&pageToken=CMgBEAA&type=video&key=AIzaSyBIt9w5pwvHgn-XFP32z8Un3K0hvj-Ut74&alt=json returned "The request cannot be completed because you have exceeded your <a href="/youtube/v3/getting-started#quota">quota</a>.". Details: "[{'message': 'The request cannot be completed because you have exceeded your <a href="/youtube/v3/getting-started#quota">quota</a>.', 'domain': 'youtube.quota', 'reason': 'quotaExceeded'}]">

In [1]:
def fetch_video_ids_with_dates(channel_id: str) -> list[dict]:
    """
    Fetches all video IDs and their publication dates from a YouTube channel.

    Args:
        channel_id (str): The YouTube channel ID.

    Returns:
        list: A list of dictionaries with video IDs and their publication years.
    """
    load_dotenv()
    api_key = os.getenv("API_KEY")
    if not api_key:
        raise ValueError("API key not found. Ensure it is set in the .env file.")

    video_data = []
    next_page_token = None

    with build("youtube", "v3", developerKey=api_key) as youtube:
        while True:
            request = youtube.search().list(
                part="id,snippet",
                channelId=channel_id,
                maxResults=50,
                pageToken=next_page_token,
                type="video"
            )
            response = request.execute()
            for item in response.get("items", []):
                video_id = item["id"]["videoId"]
                published_at = item["snippet"]["publishedAt"]
                publication_year = published_at[:4]  # Extract the year from the timestamp
                video_data.append({"video_id": video_id, "year": publication_year})
            next_page_token = response.get("nextPageToken")

            if not next_page_token:
                break

    print('(fetch_video_ids_with_dates) Video data retrieved:', len(video_data))
    return video_data

def fetch_random_video_transcripts_with_years(channel_id, sample_size=5):
    """
    Retrieves transcripts and years from a random sample of videos from a YouTube channel.

    Args:
        channel_id (str): The YouTube channel ID.
        sample_size (int): Number of random videos to retrieve transcripts from.

    Returns:
        list: A list of dictionaries containing transcripts and years.

    Example Return Value:
        [
            {
                "year": "2023",
                "transcript": [
                    {"text": "Hello world!", "start": 0.0, "duration": 1.5},
                    {"text": "Welcome to the channel.", "start": 1.5, "duration": 2.0},
                    ...
                ]
            },
            {
                "year": "2022",
                "transcript": "Transcript not available."
            },
            ...
        ]
    """
    video_data = fetch_video_ids_with_dates(channel_id)
    if not video_data:
        raise ValueError("No videos found for the given channel ID.")

    sample_video_data = random.sample(video_data, min(sample_size, len(video_data)))
    transcripts = []

    for video in sample_video_data:
        video_id = video["video_id"]
        year = video["year"]
        try:
            transcript = YouTubeTranscriptApi.get_transcript(video_id)
            transcripts.append({"year": year, "transcript": transcript})
        except (TranscriptsDisabled, VideoUnavailable):
            transcripts.append({"year": year, "transcript": "Transcript not available."})

    print('(fetch_random_video_transcripts_with_years) Transcripts retrieved:', len(transcripts))
    return transcripts

fetch_random_video_transcripts_with_years(ANOTHERROOF_ID)

NameError: name 'MRBEAST_ID' is not defined