In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from googleapiclient.discovery import build

import warnings
warnings.filterwarnings("ignore")


In [16]:
# replace with your own API key
API_KEY = 'AIzaSyDKO1RY-Lw27kxqbhBfl9q7vbRB8ZAX7pM'

def get_trending_videos(api_key, max_results=200):
    # build the youtube service
    youtube = build('youtube', 'v3', developerKey=api_key)

    # initialize the list to hold video details
    videos = []

    # fetch the most popular videos
    request = youtube.videos().list(
        part='snippet,contentDetails,statistics',
        chart='mostPopular',
        regionCode='US',  
        maxResults=50
    )

    # paginate through the results if max_results > 50
    while request and len(videos) < max_results:
        response = request.execute()
        for item in response['items']:
            video_details = {
                'video_id': item['id'],
                'title': item['snippet']['title'],
                'description': item['snippet']['description'],
                'published_at': item['snippet']['publishedAt'],
                'channel_id': item['snippet']['channelId'],
                'channel_title': item['snippet']['channelTitle'],
                'category_id': item['snippet']['categoryId'],
                'tags': item['snippet'].get('tags', []),
                'duration': item['contentDetails']['duration'],
                'definition': item['contentDetails']['definition'],
                'caption': item['contentDetails'].get('caption', 'false'),
                'view_count': item['statistics'].get('viewCount', 0),
                'like_count': item['statistics'].get('likeCount', 0),
                'dislike_count': item['statistics'].get('dislikeCount', 0),
                'favorite_count': item['statistics'].get('favoriteCount', 0),
                'comment_count': item['statistics'].get('commentCount', 0)
            }
            videos.append(video_details)

        # get the next page token
        request = youtube.videos().list_next(request, response)

    return videos[:max_results]

def save_to_csv(data, filename):
    df = pd.DataFrame(data)
    df.to_csv(filename, index=False)

def main():
    trending_videos = get_trending_videos(API_KEY)
    filename = 'trending_videos.csv'
    save_to_csv(trending_videos, filename)
    print(f'Trending videos saved to {filename}')

if __name__ == '__main__':
    main()

Trending videos saved to trending_videos.csv


In [17]:
df = pd.read_csv(r'C:\Users\modyr\Desktop\Youtube Analysis\Youtube_Data_Analysis\trending_videos.csv')

df.head()

Unnamed: 0,video_id,title,description,published_at,channel_id,channel_title,category_id,tags,duration,definition,caption,view_count,like_count,dislike_count,favorite_count,comment_count
0,SD2UiX1jpd8,IS THIS THE BEST BRAWLER EVER?!,"IMPROVED REWARDS FOR F2P, A FULL RANKED REWORK...",2025-02-22T16:00:06Z,UCooVYzDxdwTtGYAkcPmOgOw,Brawl Stars,20,"['brawl stars', 'mobile game', 'mobile strateg...",PT10M9S,hd,True,8399805,423782,0,0,42283
1,XGiv1gPrBtA,MY DAUGHTER SKIPPED SCHOOL!,Are you Team Salish or Team Jordan? If you wan...,2025-02-22T15:00:07Z,UCKaCalz5N5ienIbfPzEbYuA,Jordan Matter,24,"['salish matter', 'jordan matter', 'mall', 'sc...",PT27M19S,hd,False,2411769,55745,0,0,11692
2,xKHViBCHdxA,NO TIMER Fortnite XP Map to LEVEL UP in Chapte...,NO TIMER Fortnite XP Map to LEVEL UP in Chapte...,2025-02-22T12:01:20Z,UC-NHNPxSatoFzyrrvZIBuJg,Rajib,22,"['xp', 'xp glitch fortnite', 'fortnite xp', 'f...",PT1M27S,hd,False,119057,2147,0,0,688
3,-VASEkkjLF4,ESCAPE Evil STEPDAD Obby! (Roblox),🌼If you're reading this have an amazing day! :...,2025-02-16T14:15:00Z,UCAy3SbSn9988JS8KCQreF9A,Cutie The Bunny,20,"['roblox', 'minecraft', 'gaming', 'adopt me', ...",PT14M8S,hd,False,217039,509,0,0,1201
4,Ixj6pbyV_Ac,Fisch Mariana's Veil Update Full Guide | How T...,Complete Marian's Veil Quest Event! Magma Levi...,2025-02-22T13:25:18Z,UCMvBbCUnlCeoaiQabtJ_sQQ,DatBrian,20,"['Roblox', 'FISCH', 'roblox', 'Fisch', 'Roblox...",PT19M3S,hd,False,360804,3473,0,0,512


In [18]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 16 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   video_id        200 non-null    object
 1   title           200 non-null    object
 2   description     198 non-null    object
 3   published_at    200 non-null    object
 4   channel_id      200 non-null    object
 5   channel_title   200 non-null    object
 6   category_id     200 non-null    int64 
 7   tags            200 non-null    object
 8   duration        200 non-null    object
 9   definition      200 non-null    object
 10  caption         200 non-null    bool  
 11  view_count      200 non-null    int64 
 12  like_count      200 non-null    int64 
 13  dislike_count   200 non-null    int64 
 14  favorite_count  200 non-null    int64 
 15  comment_count   200 non-null    int64 
dtypes: bool(1), int64(6), object(9)
memory usage: 23.8+ KB


In [19]:
df.describe()

Unnamed: 0,category_id,view_count,like_count,dislike_count,favorite_count,comment_count
count,200.0,200.0,200.0,200.0,200.0,200.0
mean,18.18,3917526.0,133187.5,0.0,0.0,7624.24
std,6.32198,11615340.0,419684.7,0.0,0.0,17824.419443
min,1.0,96010.0,0.0,0.0,0.0,0.0
25%,10.0,699332.5,18717.25,0.0,0.0,1488.75
50%,20.0,1381669.0,42982.5,0.0,0.0,3317.0
75%,24.0,3096431.0,103381.5,0.0,0.0,7148.0
max,28.0,133881300.0,4419589.0,0.0,0.0,166627.0
