In [1]:
# Libraries Required
import pandas as pd
from googleapiclient.discovery import build
import matplotlib.pyplot as plt
import seaborn as sns
import isodate
import csv
print("Libraries Imported Successfully")

Libraries Imported Successfully


In [2]:
# Youtube V3 API key
API_KEY = 'AIzaSyBCQY_bqe2-2Gh6NUXzIITeEr1JQZg9C9U'
print("API Key Set Successfully")

API Key Set Successfully


In [3]:
# Function to get the trending videos of 5 specific regions AU(Australia), CA(Canada), GB(United Kingdom), IN(India), US(United States) and saving then in CSV files
def get_trending_videos(api_key, region_codes=['AU', 'CA', 'GB', 'IN', 'US'], max_results=200):
    # Initialize an empty dictionary to store videos by region
    videos_by_region = {region: [] for region in region_codes}

    # Youtube API connection build object
    youtube = build('youtube', 'v3', developerKey=api_key)

    for youtube_region_code in region_codes:
        # Getting youtube categories Names
        categories_response = youtube.videoCategories().list(part='snippet', regionCode=youtube_region_code).execute()
        category_map = {item['id']: item['snippet']['title'] for item in categories_response.get('items', [])}

        # Fetching the most popular videos
        request = youtube.videos().list(
            part='snippet,contentDetails,statistics',
            chart='mostPopular',
            regionCode=youtube_region_code,
            maxResults=50
        )

        # Paginating through the results getting the details and storing them in the list
        while request and len(videos_by_region[youtube_region_code]) < max_results:
            response = request.execute()
            for item in response['items']:
                video_details = {
                    'video_id': item['id'],
                    'title': item['snippet']['title'],
                    'description': item['snippet']['description'],
                    'published_at': item['snippet']['publishedAt'],
                    'channel_id': item['snippet']['channelId'],
                    'channel_title': item['snippet']['channelTitle'],
                    'category_id': item['snippet']['categoryId'],
                    'category_name': category_map.get(item['snippet']['categoryId'], 'Unknown'),
                    'tags': item['snippet'].get('tags', []),
                    'duration': item['contentDetails']['duration'],
                    'definition': item['contentDetails']['definition'],
                    'caption': item['contentDetails'].get('caption', 'false'),
                    'view_count': item['statistics'].get('viewCount', 0),
                    'like_count': item['statistics'].get('likeCount', 0),
                    'dislike_count': item['statistics'].get('dislikeCount', 0),
                    'favorite_count': item['statistics'].get('favoriteCount', 0),
                    'comment_count': item['statistics'].get('commentCount', 0)
                }
                videos_by_region[youtube_region_code].append(video_details)

            # Getting the next page token
            request = youtube.videos().list_next(request, response)

        # Write the data to a CSV file
        filename = f"trending_videos_{youtube_region_code}.csv"
        with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
            fieldnames = video_details.keys()
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
            writer.writerows(videos_by_region[youtube_region_code])

    # Returning the videos by region
    return videos_by_region

# Calling the function to get the trending videos
trending_videos = get_trending_videos(API_KEY)
print("Trending Videos Fetched Successfully")

Trending Videos Fetched Successfully


In [4]:
# Reading the CSV file for AU(Australia) region
trending_videos_AU = pd.read_csv('trending_videos_AU.csv')
trending_videos_AU.head()

Unnamed: 0,video_id,title,description,published_at,channel_id,channel_title,category_id,category_name,tags,duration,definition,caption,view_count,like_count,dislike_count,favorite_count,comment_count
0,wxZP0bdXYUU,Robert Whittaker knocks out Ikram Aliskerov in...,Watch as Robert Whittaker knocks out Ikram Ali...,2024-06-22T21:44:48Z,UCO4AcsPKEkIqDmbeiZLfd1A,ESPN MMA,17,Sports,"['Alexander Volkov', 'Sergey Pavlovich', 'robe...",PT1M3S,hd,False,271432,5097,0,0,904
1,Z7B7PpTOpDE,Aussie Tries American Fast Food for the First ...,Join me as I try American fast food for the fi...,2024-06-23T07:30:15Z,UCecAIXPb5KTJz5BFnUzlTaA,Spanian,24,Entertainment,"['Spanian', 'action bronson', 'american', 'bes...",PT40M38S,hd,False,358864,11605,0,0,2267
2,bmzFk5-TT3w,Hiring a DJ for a Bikies Funeral PRANK,Thanks to Danny Rant's & Billboard for helping...,2024-06-22T21:00:24Z,UCEpHkpv4_CgZIEadjjOv4jA,Misfit Minds,22,People & Blogs,[],PT12M44S,hd,False,149033,8950,0,0,395
3,Sfpr_S8nVLA,I made the worlds most powerful soccer shoe,Get a free 14-day trial of Odoo's all-in-one b...,2024-06-22T15:00:22Z,UCJLZe_NoiG0hT7QCX_9vmqw,I did a thing,24,Entertainment,[],PT21M30S,hd,False,2093348,116859,0,0,4269
4,U0Mq3mJdwh4,MEGA BOXES ARE BACK!!!,This episode of Brawl Talk brings back the mos...,2024-06-22T15:00:18Z,UCooVYzDxdwTtGYAkcPmOgOw,Brawl Stars,20,Gaming,"['brawl stars', 'supercell game', 'megabox', '...",PT8M53S,hd,True,25738337,1306402,0,0,165529


In [5]:
# Reading the csv file for CA(Canada) region
trending_videos_CA = pd.read_csv('trending_videos_CA.csv')
trending_videos_CA.head()

Unnamed: 0,video_id,title,description,published_at,channel_id,channel_title,category_id,category_name,tags,duration,definition,caption,view_count,like_count,dislike_count,favorite_count,comment_count
0,U0Mq3mJdwh4,MEGA BOXES ARE BACK!!!,This episode of Brawl Talk brings back the mos...,2024-06-22T15:00:18Z,UCooVYzDxdwTtGYAkcPmOgOw,Brawl Stars,20,Gaming,"['brawl stars', 'supercell game', 'megabox', '...",PT8M53S,hd,True,25738337,1306402,0,0,165530
1,yOS-bkcpsXo,Buffs & Nerfs | Mega Box Rates | Skin Animatio...,"Update Info! Balance Changes, Mega Box Rates, ...",2024-06-23T15:00:50Z,UCmG2EhfOwSjpPMX4LjGY__A,KairosTime Gaming,20,Gaming,"['Brawl Stars', 'BrawlStars', 'Kairos', 'Kairo...",PT23M23S,hd,False,611218,26501,0,0,3367
2,wtA8ufYCM5c,House of the Dragon S2E01 Explained,Get Nebula using my link for 40% off an annual...,2024-06-23T14:16:22Z,UCveZqqGewoyPiacooywP5Ig,Alt Shift X,27,Education,[],PT43M10S,hd,True,406877,18594,0,0,1413
3,A4ZbdU31KbQ,skibidi toilet 75,"new enemies, new friends\n\nfull-screen versio...",2024-06-24T00:00:04Z,UCsSsgPaZ2GSmO6il8Cb5iGA,DaFuq!?Boom!,24,Entertainment,"['sfm animation', 'skibidi toilet vs cameramen...",PT3M52S,hd,False,7950444,653465,0,0,53764
4,E_sVLtTODRk,Chicago Sky vs. Indiana Fever | FULL GAME HIGH...,The Chicago Sky and Indiana Fever competed in ...,2024-06-23T22:31:54Z,UCO9a_ryN_l7DIDS-VIt-zmw,WNBA,17,Sports,"['wnba', 'nba', 'basketball', 'women', 'highli...",PT9M30S,hd,False,361392,5377,0,0,2173


In [6]:
# Reading the csv file for GB(United Kingdom) region
trending_videos_GB = pd.read_csv('trending_videos_GB.csv')
trending_videos_GB.head()

Unnamed: 0,video_id,title,description,published_at,channel_id,channel_title,category_id,category_name,tags,duration,definition,caption,view_count,like_count,dislike_count,favorite_count,comment_count
0,e3HpmhBlKNQ,Mazza L20 - Fire in the Booth,"Mazza L20 is in the studio in London, UK for h...",2024-06-23T15:00:08Z,UCB5-9DHv6C6WMJOksBocwJQ,CharlieSloth,10,Music,"['charlie sloth', 'apple music', 'fire in the ...",PT10M55S,hd,False,233487,14097,0,0,1920
1,BhwpIUm34n4,GUESS THE MUSICIAN!,Thank you to Azar for sponsoring this video! \...,2024-06-23T16:27:41Z,UCWZmCMB7mmKWcXJSIPRhzZw,Miniminter,24,Entertainment,"['simon', 'sidemen', 'miniminter', 'mm7games',...",PT31M37S,hd,False,396409,22628,0,0,676
2,3nB688xBYdY,There are NOT 195 countries,Go to https://ground.news/mapmen to get the wo...,2024-06-23T14:59:38Z,UCbbQalJ4OaC0oQ0AqRaOJ9g,Jay Foreman,23,Comedy,[],PT12M16S,hd,True,598256,53104,0,0,3425
3,wtA8ufYCM5c,House of the Dragon S2E01 Explained,Get Nebula using my link for 40% off an annual...,2024-06-23T14:16:22Z,UCveZqqGewoyPiacooywP5Ig,Alt Shift X,27,Education,[],PT43M10S,hd,True,406877,18594,0,0,1413
4,U0Mq3mJdwh4,MEGA BOXES ARE BACK!!!,This episode of Brawl Talk brings back the mos...,2024-06-22T15:00:18Z,UCooVYzDxdwTtGYAkcPmOgOw,Brawl Stars,20,Gaming,"['brawl stars', 'supercell game', 'megabox', '...",PT8M53S,hd,True,25738337,1306404,0,0,165530


In [7]:
# Reading the csv file for IN(India) region
trending_videos_IN = pd.read_csv('trending_videos_IN.csv')
trending_videos_IN.head()

Unnamed: 0,video_id,title,description,published_at,channel_id,channel_title,category_id,category_name,tags,duration,definition,caption,view_count,like_count,dislike_count,favorite_count,comment_count
0,nJDclWEjGPA,#TheGOATBdayShots | Thalapathy Vijay | Venkat ...,A special GOATbdayshots from the film The Grea...,2024-06-21T18:31:00Z,UC9WXzTgk10ncJX1eOxHElCg,AGS Entertainment,1,Film & Animation,"['Thalapathy Vijay', 'Thalapathy Vijay birthda...",PT51S,hd,False,8669183,516527,0,0,12222
1,4P_k0rqmyX8,Chinna Chinna Kangal (Lyrical) | The Greatest ...,Get ready to melt in this soul stirring melody...,2024-06-22T12:02:01Z,UCq-Fj5jknLsUf-MWSy4_brA,T-Series,10,Music,"['hindi songs 2024', 'hindi songs new', 'bolly...",PT4M42S,hd,False,5957163,567811,0,0,19696
2,G4u0jS6SRTw,Armaan को पड़ा Kritika और Payal पर Comment मारन...,📲 Download JioCinema: https://bit.ly/3Be09Z3 \...,2024-06-23T13:30:11Z,UC8To9CFsZzvPafxMLzS08iA,JioCinema,24,Entertainment,"['bigg boss ott 3', 'bigg boss ott 3 contestan...",PT40S,hd,False,916576,8775,0,0,374
3,b8Nt2_EpgvY,Anubhuti kharadina ra || Khordha toka || Funny...,Experience the joy of trading on Binomo and ea...,2024-06-24T04:27:52Z,UCmuSPlchjLraieuEs33mcVA,khordha toka,23,Comedy,"['khordha toka', 'new khordha toka vdo', 'dale...",PT22M45S,hd,False,206620,26823,0,0,1033
4,SFiZRQZII1g,Living 24 Hours In Space Capsule : Challenge 🤯,SUBSCRIBE: https://youtube.com/MRINDIANHACKER?...,2024-06-22T10:28:03Z,UCSiDGb0MnHFGjs4E2WKvShw,MR. INDIAN HACKER,28,Science & Technology,[],PT27M29S,hd,False,3326409,332322,0,0,10153


In [8]:
# Reading the csv file for US(United States) region
trending_videos_US = pd.read_csv('trending_videos_US.csv')
trending_videos_US.head()

Unnamed: 0,video_id,title,description,published_at,channel_id,channel_title,category_id,category_name,tags,duration,definition,caption,view_count,like_count,dislike_count,favorite_count,comment_count
0,E_sVLtTODRk,Chicago Sky vs. Indiana Fever | FULL GAME HIGH...,The Chicago Sky and Indiana Fever competed in ...,2024-06-23T22:31:54Z,UCO9a_ryN_l7DIDS-VIt-zmw,WNBA,17,Sports,"['wnba', 'nba', 'basketball', 'women', 'highli...",PT9M30S,hd,False,361392,5377,0,0,2173
1,U0Mq3mJdwh4,MEGA BOXES ARE BACK!!!,This episode of Brawl Talk brings back the mos...,2024-06-22T15:00:18Z,UCooVYzDxdwTtGYAkcPmOgOw,Brawl Stars,20,Gaming,"['brawl stars', 'supercell game', 'megabox', '...",PT8M53S,hd,True,25738337,1306404,0,0,165530
2,qReN9SFd35o,SHA'CARRI TO PARIS: Richardson SCORCHES 100m T...,"Sha'Carri Richardson, the world's fastest woma...",2024-06-23T03:38:48Z,UCqZQlzSHbVJrwrn5XvzrzcA,NBC Sports,17,Sports,"['olympics', 'nbc sports', 'track and field', ...",PT6M56S,hd,False,1137902,24349,0,0,2348
3,0psrAMafXVg,AMP OPENS A PAWN SHOP,Get AMP streetwear https://amp.shop ⚡️⚡️\n\nSp...,2024-06-22T19:37:39Z,UCJbYdyufHR-cxOuY96KIoqA,AMP,24,Entertainment,"['AMP PAWN SHOP', 'AMP PAWN STARS', 'AMP RUNS ...",PT34M50S,hd,False,1577353,65563,0,0,2444
4,wtA8ufYCM5c,House of the Dragon S2E01 Explained,Get Nebula using my link for 40% off an annual...,2024-06-23T14:16:22Z,UCveZqqGewoyPiacooywP5Ig,Alt Shift X,27,Education,[],PT43M10S,hd,True,406877,18594,0,0,1413


In [9]:
# Fuction to check the missing values in the dataset
def missing_values(df):
    missing = df.isnull().sum()
    missing = missing[missing > 0]
    missing_percentage = (missing / df.shape[0]) * 100
    return pd.DataFrame({'Missing Values': missing, 'Percentage': missing_percentage})

In [10]:
# Function to check the data types of the columns
def data_types(df):
    return df.dtypes

In [11]:
# Checking the missing values and data types of the columns for AU(Australia) region
print("Missing Values AU(Australia) Region")
missing_values(trending_videos_AU), data_types(trending_videos_AU)

Missing Values AU(Australia) Region


(             Missing Values  Percentage
 description               1         0.5,
 video_id          object
 title             object
 description       object
 published_at      object
 channel_id        object
 channel_title     object
 category_id        int64
 category_name     object
 tags              object
 duration          object
 definition        object
 caption             bool
 view_count         int64
 like_count         int64
 dislike_count      int64
 favorite_count     int64
 comment_count      int64
 dtype: object)

In [12]:
# Checking the missing values and data types of the columns for CA(Canada) region
print("Missing Values CA(Canada) Region")
missing_values(trending_videos_CA), data_types(trending_videos_CA)

Missing Values CA(Canada) Region


(             Missing Values  Percentage
 description               1         0.5,
 video_id          object
 title             object
 description       object
 published_at      object
 channel_id        object
 channel_title     object
 category_id        int64
 category_name     object
 tags              object
 duration          object
 definition        object
 caption             bool
 view_count         int64
 like_count         int64
 dislike_count      int64
 favorite_count     int64
 comment_count      int64
 dtype: object)

In [13]:
# Checking the missing values and data types of the columns for GB(United Kingdom) region
print("Missing Values GB(United Kingdom) Region")
missing_values(trending_videos_GB), data_types(trending_videos_GB)

Missing Values GB(United Kingdom) Region


(             Missing Values  Percentage
 description               1         0.5,
 video_id          object
 title             object
 description       object
 published_at      object
 channel_id        object
 channel_title     object
 category_id        int64
 category_name     object
 tags              object
 duration          object
 definition        object
 caption             bool
 view_count         int64
 like_count         int64
 dislike_count      int64
 favorite_count     int64
 comment_count      int64
 dtype: object)

In [14]:
# Checking the missing values and data types of the columns for IN(India) region
print("Missing Values IN(India) Region")
missing_values(trending_videos_IN), data_types(trending_videos_IN)

Missing Values IN(India) Region


(             Missing Values  Percentage
 description               4    2.919708,
 video_id          object
 title             object
 description       object
 published_at      object
 channel_id        object
 channel_title     object
 category_id        int64
 category_name     object
 tags              object
 duration          object
 definition        object
 caption             bool
 view_count         int64
 like_count         int64
 dislike_count      int64
 favorite_count     int64
 comment_count      int64
 dtype: object)

In [15]:
# Checking the missing values and data types of the columns for US(United States) region
print("Missing Values US(United States) Region")
missing_values(trending_videos_US), data_types(trending_videos_US)

Missing Values US(United States) Region


(             Missing Values  Percentage
 description               1         0.5,
 video_id          object
 title             object
 description       object
 published_at      object
 channel_id        object
 channel_title     object
 category_id        int64
 category_name     object
 tags              object
 duration          object
 definition        object
 caption             bool
 view_count         int64
 like_count         int64
 dislike_count      int64
 favorite_count     int64
 comment_count      int64
 dtype: object)

In [16]:
# Function to fillna values in the dataset
def fill_na(df):
    df.fillna({'description': 'Description Blank'}, inplace=True)
    return df

In [17]:
# Filling the missing values in the description column with Description Blank
trending_videos_AU = fill_na(trending_videos_AU)
trending_videos_CA = fill_na(trending_videos_CA)
trending_videos_GB = fill_na(trending_videos_GB)
trending_videos_IN = fill_na(trending_videos_IN)
trending_videos_US = fill_na(trending_videos_US)
print("Missing Values Filled Successfully")

Missing Values Filled Successfully


In [18]:
# Function to convert published_at column to datetime
def convert_published_at(df):
    df['published_at'] = pd.to_datetime(df['published_at'])
    return df

In [19]:
# Converting published_at column to datetime
trending_videos_AU = convert_published_at(trending_videos_AU)
trending_videos_CA = convert_published_at(trending_videos_CA)
trending_videos_GB = convert_published_at(trending_videos_GB)
trending_videos_IN = convert_published_at(trending_videos_IN)
trending_videos_US = convert_published_at(trending_videos_US)
print("Published At Column Converted to Datetime Successfully")

Published At Column Converted to Datetime Successfully


In [20]:
# Function to convert tags column from str to list
def convert_tags(df):
    df['tags'] = df['tags'].apply(lambda x: eval(x) if isinstance(x, str) else x)
    return df

In [21]:
# Converting tags column from str to list
trending_videos_AU = convert_tags(trending_videos_AU)
trending_videos_CA = convert_tags(trending_videos_CA)
trending_videos_GB = convert_tags(trending_videos_GB)
trending_videos_IN = convert_tags(trending_videos_IN)
trending_videos_US = convert_tags(trending_videos_US)
print("Tags Column Converted to List Successfully")

Tags Column Converted to List Successfully


In [22]:
# function to convert isodate to date and chaning its type to int
def convert_date(df):
    df['duration'] = df['duration'].apply(lambda x: isodate.parse_duration(x).total_seconds())
    df['duration'] = df['duration'].astype(int)
    return df

In [23]:
# Converting isodate to date
trending_videos_AU = convert_date(trending_videos_AU)
trending_videos_CA = convert_date(trending_videos_CA)
trending_videos_GB = convert_date(trending_videos_GB)
trending_videos_IN = convert_date(trending_videos_IN)
trending_videos_US = convert_date(trending_videos_US)
print("Date Column Converted Successfully")

Date Column Converted Successfully


In [24]:
# Function to get the max duration of the videos
def get_max_duration(df):
    return df['duration'].max()

# Getting the max duration of the videos for AU(Australia), CA(Canada), GB(United Kingdom), IN(India), US(United States) regions
max_duration_AU = get_max_duration(trending_videos_AU)
max_duration_CA = get_max_duration(trending_videos_CA)
max_duration_GB = get_max_duration(trending_videos_GB)
max_duration_IN = get_max_duration(trending_videos_IN)
max_duration_US = get_max_duration(trending_videos_US)
print("Max AU Region",max_duration_AU)
print("Max CA Region",max_duration_CA)
print("Max GB Region",max_duration_GB)
print("Max IN Region",max_duration_IN)
print("Max US Region",max_duration_US)

Max AU Region 4417
Max CA Region 4417
Max GB Region 4417
Max IN Region 5289
Max US Region 4417


In [25]:
# Function to add duration range column
def add_duration_range_column(df):
    bins = [0, 300, 600, 900, 1200, 1500, 1800, 2100, 2400, 2700, 3000, 3300, 3600, 3900, 4200, 4500, 4800, 5100, 5400, 5700, 6000]
    labels = ['0-5 min', '5-10 min', '10-15 min', '15-20 min', '20-25 min', '25-30 min', '30-35 min', '35-40 min', '40-45 min', '45-50 min', '50-55 min', '55-60 min', '60-65 min', '65-70 min', '70-75 min', '75-80 min', '80-85 min', '85-90 min', '90-95 min', '95-100 min']
    df['duration_range'] = pd.cut(df['duration'], bins=bins, labels=labels)
    return df

In [26]:
# Adding duration range column to the dataset
trending_videos_AU= add_duration_range_column(trending_videos_AU)
trending_videos_CA = add_duration_range_column(trending_videos_CA)
trending_videos_GB = add_duration_range_column(trending_videos_GB)
trending_videos_IN = add_duration_range_column(trending_videos_IN)
trending_videos_US = add_duration_range_column(trending_videos_US)
print("Duration Range Column Added Successfully")

Duration Range Column Added Successfully


In [27]:
# Viewing the processed dataset for AU(Australia) region
trending_videos_AU.head()

Unnamed: 0,video_id,title,description,published_at,channel_id,channel_title,category_id,category_name,tags,duration,definition,caption,view_count,like_count,dislike_count,favorite_count,comment_count,duration_range
0,wxZP0bdXYUU,Robert Whittaker knocks out Ikram Aliskerov in...,Watch as Robert Whittaker knocks out Ikram Ali...,2024-06-22 21:44:48+00:00,UCO4AcsPKEkIqDmbeiZLfd1A,ESPN MMA,17,Sports,"[Alexander Volkov, Sergey Pavlovich, robert wh...",63,hd,False,271432,5097,0,0,904,0-5 min
1,Z7B7PpTOpDE,Aussie Tries American Fast Food for the First ...,Join me as I try American fast food for the fi...,2024-06-23 07:30:15+00:00,UCecAIXPb5KTJz5BFnUzlTaA,Spanian,24,Entertainment,"[Spanian, action bronson, american, best bites...",2438,hd,False,358864,11605,0,0,2267,40-45 min
2,bmzFk5-TT3w,Hiring a DJ for a Bikies Funeral PRANK,Thanks to Danny Rant's & Billboard for helping...,2024-06-22 21:00:24+00:00,UCEpHkpv4_CgZIEadjjOv4jA,Misfit Minds,22,People & Blogs,[],764,hd,False,149033,8950,0,0,395,10-15 min
3,Sfpr_S8nVLA,I made the worlds most powerful soccer shoe,Get a free 14-day trial of Odoo's all-in-one b...,2024-06-22 15:00:22+00:00,UCJLZe_NoiG0hT7QCX_9vmqw,I did a thing,24,Entertainment,[],1290,hd,False,2093348,116859,0,0,4269,20-25 min
4,U0Mq3mJdwh4,MEGA BOXES ARE BACK!!!,This episode of Brawl Talk brings back the mos...,2024-06-22 15:00:18+00:00,UCooVYzDxdwTtGYAkcPmOgOw,Brawl Stars,20,Gaming,"[brawl stars, supercell game, megabox, mega, b...",533,hd,True,25738337,1306402,0,0,165529,5-10 min


In [28]:
# Viewing the data types of the preprocessed dataset for AU(Australia) region
data_types(trending_videos_AU)

video_id                       object
title                          object
description                    object
published_at      datetime64[ns, UTC]
channel_id                     object
channel_title                  object
category_id                     int64
category_name                  object
tags                           object
duration                        int64
definition                     object
caption                          bool
view_count                      int64
like_count                      int64
dislike_count                   int64
favorite_count                  int64
comment_count                   int64
duration_range               category
dtype: object

In [29]:
# Viewing the processed dataset for CA(Canada) region
trending_videos_CA.head()

Unnamed: 0,video_id,title,description,published_at,channel_id,channel_title,category_id,category_name,tags,duration,definition,caption,view_count,like_count,dislike_count,favorite_count,comment_count,duration_range
0,U0Mq3mJdwh4,MEGA BOXES ARE BACK!!!,This episode of Brawl Talk brings back the mos...,2024-06-22 15:00:18+00:00,UCooVYzDxdwTtGYAkcPmOgOw,Brawl Stars,20,Gaming,"[brawl stars, supercell game, megabox, mega, b...",533,hd,True,25738337,1306402,0,0,165530,5-10 min
1,yOS-bkcpsXo,Buffs & Nerfs | Mega Box Rates | Skin Animatio...,"Update Info! Balance Changes, Mega Box Rates, ...",2024-06-23 15:00:50+00:00,UCmG2EhfOwSjpPMX4LjGY__A,KairosTime Gaming,20,Gaming,"[Brawl Stars, BrawlStars, Kairos, KairosTime, ...",1403,hd,False,611218,26501,0,0,3367,20-25 min
2,wtA8ufYCM5c,House of the Dragon S2E01 Explained,Get Nebula using my link for 40% off an annual...,2024-06-23 14:16:22+00:00,UCveZqqGewoyPiacooywP5Ig,Alt Shift X,27,Education,[],2590,hd,True,406877,18594,0,0,1413,40-45 min
3,A4ZbdU31KbQ,skibidi toilet 75,"new enemies, new friends\n\nfull-screen versio...",2024-06-24 00:00:04+00:00,UCsSsgPaZ2GSmO6il8Cb5iGA,DaFuq!?Boom!,24,Entertainment,"[sfm animation, skibidi toilet vs cameramen, s...",232,hd,False,7950444,653465,0,0,53764,0-5 min
4,E_sVLtTODRk,Chicago Sky vs. Indiana Fever | FULL GAME HIGH...,The Chicago Sky and Indiana Fever competed in ...,2024-06-23 22:31:54+00:00,UCO9a_ryN_l7DIDS-VIt-zmw,WNBA,17,Sports,"[wnba, nba, basketball, women, highlights, ama...",570,hd,False,361392,5377,0,0,2173,5-10 min


In [30]:
# Viewing the data types of the preprocessed dataset for CA(Canada) region
data_types(trending_videos_CA)

video_id                       object
title                          object
description                    object
published_at      datetime64[ns, UTC]
channel_id                     object
channel_title                  object
category_id                     int64
category_name                  object
tags                           object
duration                        int64
definition                     object
caption                          bool
view_count                      int64
like_count                      int64
dislike_count                   int64
favorite_count                  int64
comment_count                   int64
duration_range               category
dtype: object

In [31]:
# Viewing the processed dataset for GB(United Kingdom) region
trending_videos_GB.head()

Unnamed: 0,video_id,title,description,published_at,channel_id,channel_title,category_id,category_name,tags,duration,definition,caption,view_count,like_count,dislike_count,favorite_count,comment_count,duration_range
0,e3HpmhBlKNQ,Mazza L20 - Fire in the Booth,"Mazza L20 is in the studio in London, UK for h...",2024-06-23 15:00:08+00:00,UCB5-9DHv6C6WMJOksBocwJQ,CharlieSloth,10,Music,"[charlie sloth, apple music, fire in the booth...",655,hd,False,233487,14097,0,0,1920,10-15 min
1,BhwpIUm34n4,GUESS THE MUSICIAN!,Thank you to Azar for sponsoring this video! \...,2024-06-23 16:27:41+00:00,UCWZmCMB7mmKWcXJSIPRhzZw,Miniminter,24,Entertainment,"[simon, sidemen, miniminter, mm7games, random,...",1897,hd,False,396409,22628,0,0,676,30-35 min
2,3nB688xBYdY,There are NOT 195 countries,Go to https://ground.news/mapmen to get the wo...,2024-06-23 14:59:38+00:00,UCbbQalJ4OaC0oQ0AqRaOJ9g,Jay Foreman,23,Comedy,[],736,hd,True,598256,53104,0,0,3425,10-15 min
3,wtA8ufYCM5c,House of the Dragon S2E01 Explained,Get Nebula using my link for 40% off an annual...,2024-06-23 14:16:22+00:00,UCveZqqGewoyPiacooywP5Ig,Alt Shift X,27,Education,[],2590,hd,True,406877,18594,0,0,1413,40-45 min
4,U0Mq3mJdwh4,MEGA BOXES ARE BACK!!!,This episode of Brawl Talk brings back the mos...,2024-06-22 15:00:18+00:00,UCooVYzDxdwTtGYAkcPmOgOw,Brawl Stars,20,Gaming,"[brawl stars, supercell game, megabox, mega, b...",533,hd,True,25738337,1306404,0,0,165530,5-10 min


In [32]:
# Viewing the data types of the preprocessed dataset for GB(United Kingdom) region
data_types(trending_videos_GB)

video_id                       object
title                          object
description                    object
published_at      datetime64[ns, UTC]
channel_id                     object
channel_title                  object
category_id                     int64
category_name                  object
tags                           object
duration                        int64
definition                     object
caption                          bool
view_count                      int64
like_count                      int64
dislike_count                   int64
favorite_count                  int64
comment_count                   int64
duration_range               category
dtype: object

In [33]:
# Viewing the processed dataset for IN(India) region
trending_videos_IN.head()

Unnamed: 0,video_id,title,description,published_at,channel_id,channel_title,category_id,category_name,tags,duration,definition,caption,view_count,like_count,dislike_count,favorite_count,comment_count,duration_range
0,nJDclWEjGPA,#TheGOATBdayShots | Thalapathy Vijay | Venkat ...,A special GOATbdayshots from the film The Grea...,2024-06-21 18:31:00+00:00,UC9WXzTgk10ncJX1eOxHElCg,AGS Entertainment,1,Film & Animation,"[Thalapathy Vijay, Thalapathy Vijay birthday, ...",51,hd,False,8669183,516527,0,0,12222,0-5 min
1,4P_k0rqmyX8,Chinna Chinna Kangal (Lyrical) | The Greatest ...,Get ready to melt in this soul stirring melody...,2024-06-22 12:02:01+00:00,UCq-Fj5jknLsUf-MWSy4_brA,T-Series,10,Music,"[hindi songs 2024, hindi songs new, bollywood ...",282,hd,False,5957163,567811,0,0,19696,0-5 min
2,G4u0jS6SRTw,Armaan को पड़ा Kritika और Payal पर Comment मारन...,📲 Download JioCinema: https://bit.ly/3Be09Z3 \...,2024-06-23 13:30:11+00:00,UC8To9CFsZzvPafxMLzS08iA,JioCinema,24,Entertainment,"[bigg boss ott 3, bigg boss ott 3 contestants,...",40,hd,False,916576,8775,0,0,374,0-5 min
3,b8Nt2_EpgvY,Anubhuti kharadina ra || Khordha toka || Funny...,Experience the joy of trading on Binomo and ea...,2024-06-24 04:27:52+00:00,UCmuSPlchjLraieuEs33mcVA,khordha toka,23,Comedy,"[khordha toka, new khordha toka vdo, dalema vd...",1365,hd,False,206620,26823,0,0,1033,20-25 min
4,SFiZRQZII1g,Living 24 Hours In Space Capsule : Challenge 🤯,SUBSCRIBE: https://youtube.com/MRINDIANHACKER?...,2024-06-22 10:28:03+00:00,UCSiDGb0MnHFGjs4E2WKvShw,MR. INDIAN HACKER,28,Science & Technology,[],1649,hd,False,3326409,332322,0,0,10153,25-30 min


In [34]:
# Viewing the data types of the preprocessed dataset for IN(India) region
data_types(trending_videos_IN)

video_id                       object
title                          object
description                    object
published_at      datetime64[ns, UTC]
channel_id                     object
channel_title                  object
category_id                     int64
category_name                  object
tags                           object
duration                        int64
definition                     object
caption                          bool
view_count                      int64
like_count                      int64
dislike_count                   int64
favorite_count                  int64
comment_count                   int64
duration_range               category
dtype: object

In [35]:
# Viewing the processed dataset for US(United States) region
trending_videos_US.head()

Unnamed: 0,video_id,title,description,published_at,channel_id,channel_title,category_id,category_name,tags,duration,definition,caption,view_count,like_count,dislike_count,favorite_count,comment_count,duration_range
0,E_sVLtTODRk,Chicago Sky vs. Indiana Fever | FULL GAME HIGH...,The Chicago Sky and Indiana Fever competed in ...,2024-06-23 22:31:54+00:00,UCO9a_ryN_l7DIDS-VIt-zmw,WNBA,17,Sports,"[wnba, nba, basketball, women, highlights, ama...",570,hd,False,361392,5377,0,0,2173,5-10 min
1,U0Mq3mJdwh4,MEGA BOXES ARE BACK!!!,This episode of Brawl Talk brings back the mos...,2024-06-22 15:00:18+00:00,UCooVYzDxdwTtGYAkcPmOgOw,Brawl Stars,20,Gaming,"[brawl stars, supercell game, megabox, mega, b...",533,hd,True,25738337,1306404,0,0,165530,5-10 min
2,qReN9SFd35o,SHA'CARRI TO PARIS: Richardson SCORCHES 100m T...,"Sha'Carri Richardson, the world's fastest woma...",2024-06-23 03:38:48+00:00,UCqZQlzSHbVJrwrn5XvzrzcA,NBC Sports,17,Sports,"[olympics, nbc sports, track and field, shacar...",416,hd,False,1137902,24349,0,0,2348,5-10 min
3,0psrAMafXVg,AMP OPENS A PAWN SHOP,Get AMP streetwear https://amp.shop ⚡️⚡️\n\nSp...,2024-06-22 19:37:39+00:00,UCJbYdyufHR-cxOuY96KIoqA,AMP,24,Entertainment,"[AMP PAWN SHOP, AMP PAWN STARS, AMP RUNS A PAW...",2090,hd,False,1577353,65563,0,0,2444,30-35 min
4,wtA8ufYCM5c,House of the Dragon S2E01 Explained,Get Nebula using my link for 40% off an annual...,2024-06-23 14:16:22+00:00,UCveZqqGewoyPiacooywP5Ig,Alt Shift X,27,Education,[],2590,hd,True,406877,18594,0,0,1413,40-45 min


In [36]:
# Viewing the data types of the preprocessed dataset for US(United States) region
data_types(trending_videos_US)

video_id                       object
title                          object
description                    object
published_at      datetime64[ns, UTC]
channel_id                     object
channel_title                  object
category_id                     int64
category_name                  object
tags                           object
duration                        int64
definition                     object
caption                          bool
view_count                      int64
like_count                      int64
dislike_count                   int64
favorite_count                  int64
comment_count                   int64
duration_range               category
dtype: object

In [37]:
# Preprocessing of the dataset is completed successfully
print("Preprocessing Completed Successfully")

Preprocessing Completed Successfully
