In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import matplotlib.pyplot as plt
from googleapiclient.errors import HttpError
from googleapiclient.discovery import build
from dotenv import load_dotenv
import os

In [12]:
load_dotenv()

class YouTubeAPIClient:
    def __init__(self):
        # Load and clean keys
        keys = os.getenv("YOUTUBE_API_KEYS")
        if not keys:
            raise ValueError("No API keys found in file.")
        self.api_keys = [key.strip() for key in keys.split(',')]
        self.index = 0
        self.youtube = self._build_client(self.api_keys[self.index])

    def _build_client(self, key):
        return build("youtube", "v3", developerKey=key)

    def _rotate_key(self):
        self.index += 1
        if self.index >= len(self.api_keys):
            raise Exception("All API keys exhausted.")
        print(f"Switching to next API key: {self.api_keys[self.index]}")
        self.youtube = self._build_client(self.api_keys[self.index])

    def execute(self, request):
        while True:
            try:
                return request.execute()
            except HttpError as e:
                error_res = e.resp.get('status')
                if error_res == 403 and 'quotaExceeded' in str(e):
                    print(f"Quota exhausted for API key {self.api_keys[self.index]}")
                    self._rotate_key()
                else:
                    raise e

    def get_client(self):
        return self.youtube

yt_client = YouTubeAPIClient()
youtube = yt_client.get_client()

In [13]:
CHANNELS = [
    {"id": "UCXIJgqnII2ZOINSWNOGFThA", "name": "Fox News"},
    {"id": "UCupvZG-5ko_eiXAupbDfxWw", "name": "CNN"},
    {"id": "UCBi2mrWuNuyYy4gbM6fU18Q", "name": "ABC News"},
    {"id": "UC16niRr50-MSBwiO3YDb3RA", "name": "BBC News"},
    {"id": "UCuFFtHWoLl5fauMMD5Ww2jA", "name": "CBC News"},
    {"id": "UCi7Zk9baY1tvdlgxIML8MXg", "name": "CTV News"},
    {"id": "UCwqusr8YDwM-3mEYTDeJHzw", "name": "Republic TV"},
    {"id": "UCt4atlExw8aj3Bm79nv1fig", "name": "Times Now"},
    {"id": "UCw8Fc5R0Q5yyS3KrDCPM3xg", "name": "Times Now World"},
    {"id": "UC_gUM8rL-Lrg6O3adPW9K1g", "name": "WION"},
    {"id": "UCYPvAwZP8pZhSMW8qs7cVCw", "name": "India Today"},
    {"id": "UCaXkIU1QidjPwiAYu6GcHjg", "name": "MSNBC"},
    {"id": "UCNye-wNBqNL5ZzHSJj3l8Bg", "name": "Al Jazeera English"},
    {"id": "UChLtXXpo4Ge1ReTEboVvTDg", "name": "Global News"},
]

In [14]:
# Step 2: Get Channel Statistics
def get_channel_stats(channel_id):
    try:
        request = youtube.channels().list(
            part='snippet,statistics',
            id=channel_id
        )
        response = request.execute()
        data = response['items'][0]
        stats = data['statistics']
        return {
            'channel_id': channel_id,
            'channel_title': data['snippet']['title'],
            'subscriber_count': int(stats.get('subscriberCount', 0)),
            'total_views': int(stats.get('viewCount', 0)),
            'video_count': int(stats.get('videoCount', 0))
        }
    except Exception as e:
        print(f"Error getting channel stats for {channel_id}: {e}")
        return None

In [15]:
# Step 3: Get Uploads Playlist ID
def get_uploads_playlist_id(channel_id):
    try:
        request = youtube.channels().list(
            part='contentDetails',
            id=channel_id
        )
        response = request.execute()
        return response['items'][0]['contentDetails']['relatedPlaylists']['uploads']
    except Exception as e:
        print(f"Error getting uploads playlist for {channel_id}: {e}")
        return None

In [16]:
# Step 4: Get Latest Video IDs
def get_latest_video_ids(playlist_id, max_videos=5):
    video_ids = []
    try:
        request = youtube.playlistItems().list(
            part='contentDetails',
            playlistId=playlist_id,
            maxResults=max_videos
        )
        response = request.execute()
        for item in response['items']:
            video_ids.append(item['contentDetails']['videoId'])
    except Exception as e:
        print(f"Error getting video IDs from playlist {playlist_id}: {e}")
    return video_ids

In [7]:
# Step 3: Get Uploads Playlist ID
def get_uploads_playlist_id(channel_id):
    try:
        request = youtube.channels().list(
            part='contentDetails',
            id=channel_id
        )
        response = request.execute()
        return response['items'][0]['contentDetails']['relatedPlaylists']['uploads']
    except Exception as e:
        print(f"Error getting uploads playlist for {channel_id}: {e}")
        return None

In [8]:
# Step 4: Get Latest Video IDs
def get_latest_video_ids(playlist_id, max_videos=5):
    video_ids = []
    try:
        request = youtube.playlistItems().list(
            part='contentDetails',
            playlistId=playlist_id,
            maxResults=max_videos
        )
        response = request.execute()
        for item in response['items']:
            video_ids.append(item['contentDetails']['videoId'])
    except Exception as e:
        print(f"Error getting video IDs from playlist {playlist_id}: {e}")
    return video_ids

In [17]:
# Step 5: Get Video Metadata
def get_video_stats(video_id):
    try:
        request = youtube.videos().list(
            part='snippet,statistics',
            id=video_id
        )
        response = request.execute()
        data = response['items'][0]
        stats = data['statistics']
        return {
            'video_id': video_id,
            'video_title': data['snippet']['title'],
            'video_published_at': data['snippet']['publishedAt'],
            'video_views': int(stats.get('viewCount', 0)),
            'video_likes': int(stats.get('likeCount', 0)),
            'video_comments': int(stats.get('commentCount', 0))
        }
    except Exception as e:
        print(f"Error getting stats for video {video_id}: {e}")
        return None

In [19]:
# === Run the full pipeline ===
def get_top_news_videos():
    final_data = []
    for channel in CHANNELS:
        channel_id = channel["id"]
        channel_title = channel["name"]
        channel_info = get_channel_stats(channel_id)
        if not channel_info:
            continue

        uploads_playlist = get_uploads_playlist_id(channel_id)
        if not uploads_playlist:
            continue

        video_ids = get_latest_video_ids(uploads_playlist, max_videos=5)
        for vid in video_ids:
            video_info = get_video_stats(vid)
            if video_info:
                final_data.append({
                    **channel_info,
                    **video_info
                })

    return pd.DataFrame(final_data)

df = get_top_news_videos()
df.to_csv("top_news_videos.csv", index=False)

In [27]:
df.head()

Unnamed: 0,channel_id,channel_title,subscriber_count,total_views,video_count,video_id,video_title,video_published_at,video_views,video_likes,video_comments
0,UCXIJgqnII2ZOINSWNOGFThA,Fox News,13600000,20493015999,119781,CKWOBbunhcw,Gutfeld: Dems are the Charlie Sheen of politics,2025-05-13T02:45:01Z,12799,870,109
1,UCXIJgqnII2ZOINSWNOGFThA,Fox News,13600000,20493015999,119781,ekTH_89tC3E,What's in your wallet?: How Trump's agreement ...,2025-05-13T02:00:43Z,11209,808,360
2,UCXIJgqnII2ZOINSWNOGFThA,Fox News,13600000,20493015999,119781,Ajmp2Q8JdTQ,Edan Alexander embraces family after months as...,2025-05-13T01:28:26Z,22420,2348,223
3,UCXIJgqnII2ZOINSWNOGFThA,Fox News,13600000,20493015999,119781,8vGC9kBli5Y,"RFK Jr. goes off on Elizabeth Warren, Dems: Tr...",2025-05-13T01:15:00Z,34849,3017,608
4,UCXIJgqnII2ZOINSWNOGFThA,Fox News,13600000,20493015999,119781,CkntI89I7RI,"Watters: The way things were going, America wa...",2025-05-13T00:30:05Z,66372,4378,742


In [10]:
# === Run the full pipeline ===
def get_top_news_videos():
    final_data = []
    channels = search_news_channels(max_results=100)

    for channel_id, channel_title in channels:
        channel_info = get_channel_stats(channel_id)
        if not channel_info:
            continue

        uploads_playlist = get_uploads_playlist_id(channel_id)
        if not uploads_playlist:
            continue

        video_ids = get_latest_video_ids(uploads_playlist, max_videos=5)
        for vid in video_ids:
            video_info = get_video_stats(vid)
            if video_info:
                final_data.append({
                    **channel_info,
                    **video_info
                })

    return pd.DataFrame(final_data)

In [11]:
df = get_top_news_videos()
df.to_csv("top_news_videos.csv", index=False)
df

NameError: name 'search_news_channels' is not defined

In [None]:
df['channel_title'].unique()