In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import matplotlib.pyplot as plt
from googleapiclient.errors import HttpError
from googleapiclient.discovery import build
from dotenv import load_dotenv
import os
from youtube_transcript_api import YouTubeTranscriptApi

In [2]:
CHANNELS = [
    {"id": "UCXIJgqnII2ZOINSWNOGFThA", "name": "Fox News"},
    {"id": "UCupvZG-5ko_eiXAupbDfxWw", "name": "CNN"},
    {"id": "UCBi2mrWuNuyYy4gbM6fU18Q", "name": "ABC News"},
    {"id": "UC16niRr50-MSBwiO3YDb3RA", "name": "BBC News"},
    {"id": "UCuFFtHWoLl5fauMMD5Ww2jA", "name": "CBC News"},
    {"id": "UCi7Zk9baY1tvdlgxIML8MXg", "name": "CTV News"},
    {"id": "UCwqusr8YDwM-3mEYTDeJHzw", "name": "Republic TV"},
    {"id": "UCt4atlExw8aj3Bm79nv1fig", "name": "Times Now"},
    {"id": "UCw8Fc5R0Q5yyS3KrDCPM3xg", "name": "Times Now World"},
    {"id": "UC_gUM8rL-Lrg6O3adPW9K1g", "name": "WION"},
    {"id": "UCYPvAwZP8pZhSMW8qs7cVCw", "name": "India Today"},
    {"id": "UCaXkIU1QidjPwiAYu6GcHjg", "name": "MSNBC"},
    {"id": "UCNye-wNBqNL5ZzHSJj3l8Bg", "name": "Al Jazeera English"},
    {"id": "UChLtXXpo4Ge1ReTEboVvTDg", "name": "Global News"},
]

In [3]:
load_dotenv()

class YouTubeAPIClient:
    def __init__(self):
        # Load and clean keys
        keys = os.getenv("YOUTUBE_API_KEYS")
        if not keys:
            raise ValueError("No API keys found in file.")
        self.api_keys = [key.strip() for key in keys.split(',')]
        self.index = 0
        self.youtube = self._build_client(self.api_keys[self.index])

    def _build_client(self, key):
        return build("youtube", "v3", developerKey=key)

    def _rotate_key(self):
        self.index += 1
        if self.index >= len(self.api_keys):
            raise Exception("All API keys exhausted.")
        print(f"Switching to next API key: {self.api_keys[self.index]}")
        self.youtube = self._build_client(self.api_keys[self.index])

    def execute(self, request):
        while True:
            try:
                return request.execute()
            except HttpError as e:
                error_res = e.resp.get('status')
                if error_res == 403 and 'quotaExceeded' in str(e):
                    print(f"Quota exhausted for API key {self.api_keys[self.index]}")
                    self._rotate_key()
                else:
                    raise e

    def get_client(self):
        return self.youtube

yt_client = YouTubeAPIClient()
youtube = yt_client.get_client()

In [4]:
# Step 2: Get Channel Statistics
def get_channel_stats(channel_id):
    try:
        request = youtube.channels().list(
            part='snippet,statistics',
            id=channel_id
        )
        response = request.execute()
        data = response['items'][0]
        stats = data['statistics']
        return {
            'channel_id': channel_id,
            'channel_title': data['snippet']['title'],
            'subscriber_count': int(stats.get('subscriberCount', 0)),
            'total_views': int(stats.get('viewCount', 0)),
            'video_count': int(stats.get('videoCount', 0))
        }
    except Exception as e:
        print(f"Error getting channel stats for {channel_id}: {e}")
        return None

In [5]:
get_channel_stats('UCYPvAwZP8pZhSMW8qs7cVCw')

{'channel_id': 'UCYPvAwZP8pZhSMW8qs7cVCw',
 'channel_title': 'India Today',
 'subscriber_count': 10400000,
 'total_views': 4827499456,
 'video_count': 236181}

In [6]:
# Capture video details

def get_video_data(video_id):
    try:
        request = youtube.videos().list(
            part="snippet,statistics",
            id=video_id
        )
        response = request.execute()

        if response['items']:
            video = response['items'][0]
            snippet = video['snippet']
            statistics = video['statistics']
            video_data = {
                'video_id': video_id,
                'title': snippet['title'],
                'description': snippet['description'],
                'published_at': snippet['publishedAt'],
                'views': int(statistics.get('viewCount', 0)),
                'likes': int(statistics.get('likeCount', 0)),
                'dislikes': int(statistics.get('dislikeCount', 0)),
                'comment_count': int(statistics.get('commentCount', 0))
            }
            return video_data
        else:
            print(f"No data found for video ID: {video_id}")
            return None
    except Exception as e:
        print(f"Error getting video data for {video_id}: {e}")
        return None

In [10]:
get_video_data('hLG0tvZ7hi0')

{'video_id': 'hLG0tvZ7hi0',
 'title': 'US President Donald Trump arrives in Saudi Arabia, meets MBS',
 'description': "US President Donald Trump’s plane has touched down in Riyadh on first stop in three-country trip.\nSaudi Crown Prince Mohammed bin Salman has welcomed Trump at the tarmac of the airport in Riyadh.\n\nAl Jazeera's Kimberly Halkett and Hashem Ahelbarra is monitoring developments in Riyadh, Saudi Arabia. \n\nSubscribe to our channel: http://bit.ly/AJSubscribe\nFollow us on X: https://x.com/ajenglish\nFind us on Facebook: https://www.facebook.com/aljazeera\nCheck our website:  https://www.aljazeera.com\nCheck out our Instagram page: https://www.instagram.com/aljazeeraenglish\nDownload AJE Mobile App: https://aje.io/AJEMobile\n\n#DonaldTrump #SaudiArabia #Riyadh #PrinceMohammedbinSalman #MBS #TrumpGulfVisit #MiddleEast #AlJazeeraEnglish",
 'published_at': '2025-05-13T07:31:42Z',
 'views': 432855,
 'likes': 4351,
 'dislikes': 0,
 'comment_count': 1079}

In [9]:
video_id='hLG0tvZ7hi0'
def get_captions(video_id):
    try:
        transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
        return [entry['text'] for entry in transcript]
    except Exception as e:
        print(f"Error getting transcript for {video_id}: {e}")
        return []

# Fetch and print the captions
captions = get_captions(video_id)
for line in captions:
    print(line)

Let's now go to our White House
correspondent Kimbley Halit. She is
normally in Washington DC, but today she
is in Riad to follow this visit. And
Kimbley, we're expecting to see the
Saudi Crown Prince Muhammad bin Salman
uh at the international airport there in
Riyad where US President Donald Trump
has just touched down. Uh Trump can
expect a full red carpet welcome, can't
he?
Yeah, literally the red carpet is
expected to be rolled out for Donald
Trump. He is expected to have a very
similar lavish welcoming ceremony much
like we saw in 2017. The sword dance is
expected to be part of that traditional
welcoming ceremony. He was uh offered a
a gold medal, one of the highest
civilian honors the f during his first
term. We expect something similar again
this time. But in terms of all of the
opulence that will be afforded to Donald
Trump, this is something that Donald
Trump likes. But at the same time, he's
also heavily criticized back in the
United States for the fact that he has a
very clo

In [11]:
def get_video_comments(video_id, max_comments=500):
    comments = []
    next_page_token = None
    total_fetched = 0

    while total_fetched < max_comments:
        try:
            request = youtube.commentThreads().list(
                part="snippet,replies",
                videoId=video_id,
                maxResults=100,
                pageToken=next_page_token,
                textFormat="plainText"
            )
            response = yt_client.execute(request)

            for item in response["items"]:
                top_comment = item["snippet"]["topLevelComment"]["snippet"]
                comment_data = {
                    "comment_id": item["snippet"]["topLevelComment"]["id"],
                    "author": top_comment.get("authorDisplayName"),
                    "text": top_comment.get("textDisplay"),
                    "like_count": top_comment.get("likeCount"),
                    "published_at": top_comment.get("publishedAt"),
                    "reply_count": item["snippet"].get("totalReplyCount", 0),
                    "replies": []
                }

                # Capture replies if any
                if "replies" in item:
                    for reply in item["replies"]["comments"]:
                        reply_snippet = reply["snippet"]
                        comment_data["replies"].append({
                            "author": reply_snippet.get("authorDisplayName"),
                            "text": reply_snippet.get("textDisplay"),
                            "like_count": reply_snippet.get("likeCount"),
                            "published_at": reply_snippet.get("publishedAt"),
                        })

                comments.append(comment_data)
                total_fetched += 1

                if total_fetched >= max_comments:
                    break

            next_page_token = response.get("nextPageToken")
            if not next_page_token:
                break

        except Exception as e:
            print(f"Error fetching comments: {e}")
            break

    return comments

In [12]:
video_id = "hLG0tvZ7hi0"
comments = get_video_comments(video_id, max_comments=300)
print(f"Total comments fetched: {len(comments)}")

Total comments fetched: 300


In [14]:
def comments_to_dataframe(comments):
    rows = []

    for comment in comments:
        # Top-level comment
        rows.append({
            "username": comment["author"],
            "comment": comment["text"],
            "type": "comment"
        })

        # Replies (if any)
        for reply in comment.get("replies", []):
            rows.append({
                "username": reply["author"],
                "comment": reply["text"],
                "type": "reply"
            })

    return pd.DataFrame(rows)

In [25]:
video_id = "hLG0tvZ7hi0"
comments = get_video_comments(video_id, max_comments=300)

df_comments = comments_to_dataframe(comments)
df_comments.head()

Unnamed: 0,username,comment,type
0,@mdfahidahmmed320,Gaza cries. Blood flows where prayers once ech...,comment
1,@xvizcaino4,Best President ever . God bless you Donald Tru...,comment
2,@jamesbomd3503,I don't think Al Jazeera should worry about Do...,comment
3,@tomlaz2039,And he walked away with a trillion dollar deal...,comment
4,@TafLOVERohayla,🤔You meant the United state airplane ✈️ is obv...,comment


In [28]:
df_comments[df_comments['type'] == 'reply']

Unnamed: 0,username,comment,type
30,@SuzetteSmetka,"Yeah, from one murderer to another. \nYou fool.",reply
77,@rattata30,I’m Catholic you are my brother in Christ! Hop...,reply
78,@fa1rumi,@@rattata30 May there be peace and happiness f...,reply
80,@rattata30,Not only that but the disrespect of not giving...,reply
129,@meganalves9967,He’s not it’s called trade & deals,reply
130,@Ibn.Hadarmuud.,@@meganalves9967 why doesn’t Trump get the onl...,reply
208,@beyesteadfast8488,🇺🇸🇺🇸🇺🇸🇺🇸🇺🇸🇺🇸🇺🇸,reply
209,@hubertmatos5920,"Drink water, too much poison in you.",reply
210,@MrAdam2853,Its a business dead,reply
214,@sheila-we7em,IT'S NOT COKE COLA😁😁,reply
