In [1]:
# Importing Libraries
import pandas as pd
from googleapiclient.discovery import build
import isodate
import csv
print("Libraries Imported Successfully")

Libraries Imported Successfully


In [2]:
# Youtube V3 API key
API_KEY = 'Enter Your API Key Here'
print("API Key Set Successfully")

API Key Set Successfully


In [3]:
# Function to get the trending videos of 5 specific regions AU(Australia), CA(Canada), GB(United Kingdom), IN(India), US(United States) and saving then in CSV files
def get_trending_videos(api_key, region_codes=['AU', 'CA', 'GB', 'IN', 'US'], max_results=200):
    # Initialize an empty dictionary to store videos by region
    videos_by_region = {region: [] for region in region_codes}

    # Youtube API connection build object
    youtube = build('youtube', 'v3', developerKey=api_key)

    for youtube_region_code in region_codes:
        # Getting youtube categories Names
        categories_response = youtube.videoCategories().list(part='snippet', regionCode=youtube_region_code).execute()
        category_map = {item['id']: item['snippet']['title'] for item in categories_response.get('items', [])}

        # Fetching the most popular videos
        request = youtube.videos().list(
            part='snippet,contentDetails,statistics',
            chart='mostPopular',
            regionCode=youtube_region_code,
            maxResults=50
        )

        # Paginating through the results getting the details and storing them in the list
        while request and len(videos_by_region[youtube_region_code]) < max_results:
            response = request.execute()
            for item in response['items']:
                video_details = {
                    'video_id': item['id'],
                    'title': item['snippet']['title'],
                    'description': item['snippet']['description'],
                    'published_at': item['snippet']['publishedAt'],
                    'channel_id': item['snippet']['channelId'],
                    'channel_title': item['snippet']['channelTitle'],
                    'category_id': item['snippet']['categoryId'],
                    'category_name': category_map.get(item['snippet']['categoryId'], 'Unknown'),
                    'tags': item['snippet'].get('tags', []),
                    'duration': item['contentDetails']['duration'],
                    'view_count': item['statistics'].get('viewCount', 0),
                    'like_count': item['statistics'].get('likeCount', 0),
                    'comment_count': item['statistics'].get('commentCount', 0)
                }
                videos_by_region[youtube_region_code].append(video_details)

            # Getting the next page token
            request = youtube.videos().list_next(request, response)

        # Write the data to a CSV file
        filename = f"trending_videos_{youtube_region_code}.csv"
        with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
            fieldnames = video_details.keys()
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
            writer.writerows(videos_by_region[youtube_region_code])

    # Returning the videos by region
    return videos_by_region

# Calling the function to get the trending videos
trending_videos = get_trending_videos(API_KEY)
print("Trending Videos Fetched Successfully")

Trending Videos Fetched Successfully


In [4]:
# Reading the CSV file for AU(Australia) region
trending_videos_AU = pd.read_csv('trending_videos_AU.csv')
trending_videos_AU.head()

Unnamed: 0,video_id,title,description,published_at,channel_id,channel_title,category_id,category_name,tags,duration,view_count,like_count,comment_count
0,wxZP0bdXYUU,Robert Whittaker knocks out Ikram Aliskerov in...,Watch as Robert Whittaker knocks out Ikram Ali...,2024-06-22T21:44:48Z,UCO4AcsPKEkIqDmbeiZLfd1A,ESPN MMA,17,Sports,"['Alexander Volkov', 'Sergey Pavlovich', 'robe...",PT1M3S,289370,5306,933
1,Z7B7PpTOpDE,Aussie Tries American Fast Food for the First ...,Join me as I try American fast food for the fi...,2024-06-23T07:30:15Z,UCecAIXPb5KTJz5BFnUzlTaA,Spanian,24,Entertainment,"['Spanian', 'action bronson', 'american', 'bes...",PT40M38S,404728,12134,2375
2,_WY42gatzVY,3 Days Solo Boat Camping To Catch Food For My ...,"A wild boat camping trip in remote Australia, ...",2024-06-23T10:00:23Z,UCSnwORddxZG1SyB8lSxWQGg,YBS Youngbloods,17,Sports,[],PT28M28S,421999,15742,778
3,bmzFk5-TT3w,Hiring a DJ for a Bikies Funeral PRANK,Thanks to Danny Rant's & Billboard for helping...,2024-06-22T21:00:24Z,UCEpHkpv4_CgZIEadjjOv4jA,Misfit Minds,22,People & Blogs,[],PT12M44S,163324,9469,410
4,Sfpr_S8nVLA,I made the worlds most powerful soccer shoe,Get a free 14-day trial of Odoo's all-in-one b...,2024-06-22T15:00:22Z,UCJLZe_NoiG0hT7QCX_9vmqw,I did a thing,24,Entertainment,[],PT21M30S,2275736,125152,4457


In [5]:
# Reading the csv file for CA(Canada) region
trending_videos_CA = pd.read_csv('trending_videos_CA.csv')
trending_videos_CA.head()

Unnamed: 0,video_id,title,description,published_at,channel_id,channel_title,category_id,category_name,tags,duration,view_count,like_count,comment_count
0,gTkMhlrIl-4,House of the Dragon Season 2 | Episode 3 Previ...,Reason will be forgotten. A new episode of the...,2024-06-24T02:00:27Z,UCx-KWLTKlB83hDI6UKECtJQ,Max,24,Entertainment,"['max', 'hbo max', 'hbo', 'max originals', 'ma...",PT1M6S,546950,12793,1407
1,FyG21rXCxlY,Red Velvet 레드벨벳 'Cosmic' MV,"Red Velvet's new album ""Cosmic"" is out!\nListe...",2024-06-24T09:44:02Z,UCEf_Bc-KVd7onSeifS3py9g,SMTOWN,10,Music,"['Red Velvet', '레드벨벳', '레벨', 'RV', '아이린', '웬디'...",PT3M50S,3242960,334848,19547
2,U0Mq3mJdwh4,MEGA BOXES ARE BACK!!!,This episode of Brawl Talk brings back the mos...,2024-06-22T15:00:18Z,UCooVYzDxdwTtGYAkcPmOgOw,Brawl Stars,20,Gaming,"['brawl stars', 'supercell game', 'megabox', '...",PT8M53S,26911535,1320553,166472
3,bcH7050gXc0,Six Days Camping in the Deep Wilderness - Catc...,https://mavoutdoors.com/\n\nBook a stay at my ...,2024-06-23T22:30:06Z,UCL_BZpt0J9Kqwy6YPWr30ow,Mav,24,Entertainment,"['Mav', 'Mavrikjoos', 'mavrik joos', 'Truck Ca...",PT59M49S,240585,10735,767
4,tAnC8Hvqhm8,Unlocking Unlimited Heroes | Community Event,Follow us on our Socials!\nTwitter ► https://t...,2024-06-24T07:58:00Z,UCD1Em4q90ZUK2R5HKesszJg,Clash of Clans,20,Gaming,"['clash of clans', 'COC', 'Clash of Clans Game...",PT1M47S,381907,15959,1497


In [6]:
# Reading the csv file for GB(United Kingdom) region
trending_videos_GB = pd.read_csv('trending_videos_GB.csv')
trending_videos_GB.head()

Unnamed: 0,video_id,title,description,published_at,channel_id,channel_title,category_id,category_name,tags,duration,view_count,like_count,comment_count
0,e3HpmhBlKNQ,Mazza L20 - Fire in the Booth,"Mazza L20 is in the studio in London, UK for h...",2024-06-23T15:00:08Z,UCB5-9DHv6C6WMJOksBocwJQ,CharlieSloth,10,Music,"['charlie sloth', 'apple music', 'fire in the ...",PT10M55S,344777,17270,2314
1,BhwpIUm34n4,GUESS THE MUSICIAN!,Thank you to Azar for sponsoring this video! \...,2024-06-23T16:27:41Z,UCWZmCMB7mmKWcXJSIPRhzZw,Miniminter,24,Entertainment,"['simon', 'sidemen', 'miniminter', 'mm7games',...",PT31M37S,512136,27025,763
2,gTkMhlrIl-4,House of the Dragon Season 2 | Episode 3 Previ...,Reason will be forgotten. A new episode of the...,2024-06-24T02:00:27Z,UCx-KWLTKlB83hDI6UKECtJQ,Max,24,Entertainment,"['max', 'hbo max', 'hbo', 'max originals', 'ma...",PT1M6S,546950,12796,1407
3,FyG21rXCxlY,Red Velvet 레드벨벳 'Cosmic' MV,"Red Velvet's new album ""Cosmic"" is out!\nListe...",2024-06-24T09:44:02Z,UCEf_Bc-KVd7onSeifS3py9g,SMTOWN,10,Music,"['Red Velvet', '레드벨벳', '레벨', 'RV', '아이린', '웬디'...",PT3M50S,3242960,334848,19547
4,z8_rVvmC6t8,Here's why I sold my Lotus Emira and bought th...,I recently decided to sell my Lotus Emira and ...,2024-06-23T16:00:50Z,UCIB5XXHNAWWzTOw6guIMYCg,Harry's garage,2,Autos & Vehicles,[],PT22M35S,359128,20309,1690


In [7]:
# Reading the csv file for IN(India) region
trending_videos_IN = pd.read_csv('trending_videos_IN.csv')
trending_videos_IN.head()

Unnamed: 0,video_id,title,description,published_at,channel_id,channel_title,category_id,category_name,tags,duration,view_count,like_count,comment_count
0,nJDclWEjGPA,#TheGOATBdayShots | Thalapathy Vijay | Venkat ...,A special GOATbdayshots from the film The Grea...,2024-06-21T18:31:00Z,UC9WXzTgk10ncJX1eOxHElCg,AGS Entertainment,1,Film & Animation,"['Thalapathy Vijay', 'Thalapathy Vijay birthda...",PT51S,8993996,524644,12375
1,4P_k0rqmyX8,Chinna Chinna Kangal (Lyrical) | The Greatest ...,Get ready to melt in this soul stirring melody...,2024-06-22T12:02:01Z,UCq-Fj5jknLsUf-MWSy4_brA,T-Series,10,Music,"['hindi songs 2024', 'hindi songs new', 'bolly...",PT4M42S,6418847,578522,19950
2,b8Nt2_EpgvY,Anubhuti kharadina ra || Khordha toka || Funny...,Experience the joy of trading on Binomo and ea...,2024-06-24T04:27:52Z,UCmuSPlchjLraieuEs33mcVA,khordha toka,23,Comedy,"['khordha toka', 'new khordha toka vdo', 'dale...",PT22M45S,413297,38097,1301
3,G4u0jS6SRTw,Armaan को पड़ा Kritika और Payal पर Comment मारन...,📲 Download JioCinema: https://bit.ly/3Be09Z3 \...,2024-06-23T13:30:11Z,UC8To9CFsZzvPafxMLzS08iA,JioCinema,24,Entertainment,"['bigg boss ott 3', 'bigg boss ott 3 contestan...",PT40S,1236788,10287,561
4,4N9N7tyR3gQ,ASKING MONEY FROM S8UL CREATORS,THANKS FOR WATCHING THE VIDEO. DON'T FORGET TO...,2024-06-24T04:30:03Z,UCPzIOAKAvphrnIfa9wFj69w,8bit MAMBA,24,Entertainment,"['8bit', '8bit mamba', 'mamba']",PT19M28S,542233,82977,2181


In [8]:
# Reading the csv file for US(United States) region
trending_videos_US = pd.read_csv('trending_videos_US.csv')
trending_videos_US.head()

Unnamed: 0,video_id,title,description,published_at,channel_id,channel_title,category_id,category_name,tags,duration,view_count,like_count,comment_count
0,E_sVLtTODRk,Chicago Sky vs. Indiana Fever | FULL GAME HIGH...,The Chicago Sky and Indiana Fever competed in ...,2024-06-23T22:31:54Z,UCO9a_ryN_l7DIDS-VIt-zmw,WNBA,17,Sports,"['wnba', 'nba', 'basketball', 'women', 'highli...",PT9M30S,513456,6506,2588
1,FyG21rXCxlY,Red Velvet 레드벨벳 'Cosmic' MV,"Red Velvet's new album ""Cosmic"" is out!\nListe...",2024-06-24T09:44:02Z,UCEf_Bc-KVd7onSeifS3py9g,SMTOWN,10,Music,"['Red Velvet', '레드벨벳', '레벨', 'RV', '아이린', '웬디'...",PT3M50S,3242960,334863,19547
2,qReN9SFd35o,SHA'CARRI TO PARIS: Richardson SCORCHES 100m T...,"Sha'Carri Richardson, the world's fastest woma...",2024-06-23T03:38:48Z,UCqZQlzSHbVJrwrn5XvzrzcA,NBC Sports,17,Sports,"['olympics', 'nbc sports', 'track and field', ...",PT6M56S,1260219,26439,2539
3,U0Mq3mJdwh4,MEGA BOXES ARE BACK!!!,This episode of Brawl Talk brings back the mos...,2024-06-22T15:00:18Z,UCooVYzDxdwTtGYAkcPmOgOw,Brawl Stars,20,Gaming,"['brawl stars', 'supercell game', 'megabox', '...",PT8M53S,26911535,1320553,166472
4,bcH7050gXc0,Six Days Camping in the Deep Wilderness - Catc...,https://mavoutdoors.com/\n\nBook a stay at my ...,2024-06-23T22:30:06Z,UCL_BZpt0J9Kqwy6YPWr30ow,Mav,24,Entertainment,"['Mav', 'Mavrikjoos', 'mavrik joos', 'Truck Ca...",PT59M49S,240585,10735,767


In [9]:
# Fuction to check the missing values in the dataset
def missing_values(df):
    missing = df.isnull().sum()
    missing = missing[missing > 0]
    missing_percentage = (missing / df.shape[0]) * 100
    return pd.DataFrame({'Missing Values': missing, 'Percentage': missing_percentage})

In [10]:
# Function to check the data types of the columns
def data_types(df):
    return df.dtypes

In [11]:
# Checking the missing values and data types of the columns for AU(Australia) region
print("Missing Values AU(Australia) Region")
missing_values(trending_videos_AU), data_types(trending_videos_AU)

Missing Values AU(Australia) Region


(Empty DataFrame
 Columns: [Missing Values, Percentage]
 Index: [],
 video_id         object
 title            object
 description      object
 published_at     object
 channel_id       object
 channel_title    object
 category_id       int64
 category_name    object
 tags             object
 duration         object
 view_count        int64
 like_count        int64
 comment_count     int64
 dtype: object)

In [12]:
# Checking the missing values and data types of the columns for CA(Canada) region
print("Missing Values CA(Canada) Region")
missing_values(trending_videos_CA), data_types(trending_videos_CA)

Missing Values CA(Canada) Region


(Empty DataFrame
 Columns: [Missing Values, Percentage]
 Index: [],
 video_id         object
 title            object
 description      object
 published_at     object
 channel_id       object
 channel_title    object
 category_id       int64
 category_name    object
 tags             object
 duration         object
 view_count        int64
 like_count        int64
 comment_count     int64
 dtype: object)

In [13]:
# Checking the missing values and data types of the columns for GB(United Kingdom) region
print("Missing Values GB(United Kingdom) Region")
missing_values(trending_videos_GB), data_types(trending_videos_GB)

Missing Values GB(United Kingdom) Region


(Empty DataFrame
 Columns: [Missing Values, Percentage]
 Index: [],
 video_id         object
 title            object
 description      object
 published_at     object
 channel_id       object
 channel_title    object
 category_id       int64
 category_name    object
 tags             object
 duration         object
 view_count        int64
 like_count        int64
 comment_count     int64
 dtype: object)

In [14]:
# Checking the missing values and data types of the columns for IN(India) region
print("Missing Values IN(India) Region")
missing_values(trending_videos_IN), data_types(trending_videos_IN)

Missing Values IN(India) Region


(             Missing Values  Percentage
 description               4    3.076923,
 video_id         object
 title            object
 description      object
 published_at     object
 channel_id       object
 channel_title    object
 category_id       int64
 category_name    object
 tags             object
 duration         object
 view_count        int64
 like_count        int64
 comment_count     int64
 dtype: object)

In [15]:
# Checking the missing values and data types of the columns for US(United States) region
print("Missing Values US(United States) Region")
missing_values(trending_videos_US), data_types(trending_videos_US)

Missing Values US(United States) Region


(Empty DataFrame
 Columns: [Missing Values, Percentage]
 Index: [],
 video_id         object
 title            object
 description      object
 published_at     object
 channel_id       object
 channel_title    object
 category_id       int64
 category_name    object
 tags             object
 duration         object
 view_count        int64
 like_count        int64
 comment_count     int64
 dtype: object)

In [16]:
# Function to fillna values in the dataset
def fill_na(df):
    df.fillna({'description': 'Description Blank'}, inplace=True)
    return df

In [17]:
# Filling the missing values in the description column with Description Blank
trending_videos_AU = fill_na(trending_videos_AU)
trending_videos_CA = fill_na(trending_videos_CA)
trending_videos_GB = fill_na(trending_videos_GB)
trending_videos_IN = fill_na(trending_videos_IN)
trending_videos_US = fill_na(trending_videos_US)
print("Missing Values Filled Successfully")

Missing Values Filled Successfully


In [18]:
# Function to convert published_at column to datetime
def convert_published_at(df):
    df['published_at'] = pd.to_datetime(df['published_at'])
    return df

In [19]:
# Converting published_at column to datetime
trending_videos_AU = convert_published_at(trending_videos_AU)
trending_videos_CA = convert_published_at(trending_videos_CA)
trending_videos_GB = convert_published_at(trending_videos_GB)
trending_videos_IN = convert_published_at(trending_videos_IN)
trending_videos_US = convert_published_at(trending_videos_US)
print("Published At Column Converted to Datetime Successfully")

Published At Column Converted to Datetime Successfully


In [20]:
# Function to convert tags column from str to list
def convert_tags(df):
    df['tags'] = df['tags'].apply(lambda x: eval(x) if isinstance(x, str) else x)
    return df

In [21]:
# Converting tags column from str to list
trending_videos_AU = convert_tags(trending_videos_AU)
trending_videos_CA = convert_tags(trending_videos_CA)
trending_videos_GB = convert_tags(trending_videos_GB)
trending_videos_IN = convert_tags(trending_videos_IN)
trending_videos_US = convert_tags(trending_videos_US)
print("Tags Column Converted to List Successfully")

Tags Column Converted to List Successfully


In [22]:
# function to convert duration to seconds and its type to int
def convert_duration(df):
    df['duration'] = df['duration'].apply(lambda x: isodate.parse_duration(x).total_seconds())
    df['duration'] = df['duration'].astype(int)
    return df

In [23]:
# converting duration to seconds and its type to int
trending_videos_AU = convert_duration(trending_videos_AU)
trending_videos_CA = convert_duration(trending_videos_CA)
trending_videos_GB = convert_duration(trending_videos_GB)
trending_videos_IN = convert_duration(trending_videos_IN)
trending_videos_US = convert_duration(trending_videos_US)
print("Duration Column Converted Successfully")

Date Column Converted Successfully


In [24]:
# Function to get the max duration of the videos
def get_max_duration(df):
    return df['duration'].max()

# Getting the max duration of the videos for AU(Australia), CA(Canada), GB(United Kingdom), IN(India), US(United States) regions
max_duration_AU = get_max_duration(trending_videos_AU)
max_duration_CA = get_max_duration(trending_videos_CA)
max_duration_GB = get_max_duration(trending_videos_GB)
max_duration_IN = get_max_duration(trending_videos_IN)
max_duration_US = get_max_duration(trending_videos_US)
print("Max AU Region",max_duration_AU)
print("Max CA Region",max_duration_CA)
print("Max GB Region",max_duration_GB)
print("Max IN Region",max_duration_IN)
print("Max US Region",max_duration_US)

Max AU Region 4417
Max CA Region 4417
Max GB Region 4417
Max IN Region 3583
Max US Region 4417


In [25]:
# Function to add duration range column
def add_duration_range_column(df):
    bins = [0, 300, 600, 900, 1200, 1500, 1800, 2100, 2400, 2700, 3000, 3300, 3600, 3900, 4200, 4500, 4800, 5100, 5400, 5700, 6000]
    labels = ['0-5 min', '5-10 min', '10-15 min', '15-20 min', '20-25 min', '25-30 min', '30-35 min', '35-40 min', '40-45 min', '45-50 min', '50-55 min', '55-60 min', '60-65 min', '65-70 min', '70-75 min', '75-80 min', '80-85 min', '85-90 min', '90-95 min', '95-100 min']
    df['duration_range'] = pd.cut(df['duration'], bins=bins, labels=labels)
    return df

In [26]:
# Adding duration range column to the dataset
trending_videos_AU= add_duration_range_column(trending_videos_AU)
trending_videos_CA = add_duration_range_column(trending_videos_CA)
trending_videos_GB = add_duration_range_column(trending_videos_GB)
trending_videos_IN = add_duration_range_column(trending_videos_IN)
trending_videos_US = add_duration_range_column(trending_videos_US)
print("Duration Range Column Added Successfully")

Duration Range Column Added Successfully


In [27]:
# Function to add tag count column
def add_tag_count_column(df):
    df['tag_count'] = df['tags'].apply(lambda x: len(x) if isinstance(x, list) else 0)
    return df

In [28]:
# Adding tag count column to the dataset
trending_videos_AU = add_tag_count_column(trending_videos_AU)
trending_videos_CA = add_tag_count_column(trending_videos_CA)
trending_videos_GB = add_tag_count_column(trending_videos_GB)
trending_videos_IN = add_tag_count_column(trending_videos_IN)
trending_videos_US = add_tag_count_column(trending_videos_US)
print("Tag Count Column Added Successfully")

Tag Count Column Added Successfully


In [29]:
# Function to add published hour coloumn
def add_published_hour_column(df):
    df['published_hour'] = df['published_at'].dt.hour
    return df

In [30]:
# Adding published hour column to the dataset
trending_videos_AU = add_published_hour_column(trending_videos_AU)
trending_videos_CA = add_published_hour_column(trending_videos_CA)
trending_videos_GB = add_published_hour_column(trending_videos_GB)
trending_videos_IN = add_published_hour_column(trending_videos_IN)
trending_videos_US = add_published_hour_column(trending_videos_US)
print("Published Hour Column Added Successfully")

Published Hour Column Added Successfully


In [31]:
# Viewing the processed dataset for AU(Australia) region
trending_videos_AU.head()

Unnamed: 0,video_id,title,description,published_at,channel_id,channel_title,category_id,category_name,tags,duration,view_count,like_count,comment_count,duration_range,tag_count,published_hour
0,wxZP0bdXYUU,Robert Whittaker knocks out Ikram Aliskerov in...,Watch as Robert Whittaker knocks out Ikram Ali...,2024-06-22 21:44:48+00:00,UCO4AcsPKEkIqDmbeiZLfd1A,ESPN MMA,17,Sports,"[Alexander Volkov, Sergey Pavlovich, robert wh...",63,289370,5306,933,0-5 min,14,21
1,Z7B7PpTOpDE,Aussie Tries American Fast Food for the First ...,Join me as I try American fast food for the fi...,2024-06-23 07:30:15+00:00,UCecAIXPb5KTJz5BFnUzlTaA,Spanian,24,Entertainment,"[Spanian, action bronson, american, best bites...",2438,404728,12134,2375,40-45 min,28,7
2,_WY42gatzVY,3 Days Solo Boat Camping To Catch Food For My ...,"A wild boat camping trip in remote Australia, ...",2024-06-23 10:00:23+00:00,UCSnwORddxZG1SyB8lSxWQGg,YBS Youngbloods,17,Sports,[],1708,421999,15742,778,25-30 min,0,10
3,bmzFk5-TT3w,Hiring a DJ for a Bikies Funeral PRANK,Thanks to Danny Rant's & Billboard for helping...,2024-06-22 21:00:24+00:00,UCEpHkpv4_CgZIEadjjOv4jA,Misfit Minds,22,People & Blogs,[],764,163324,9469,410,10-15 min,0,21
4,Sfpr_S8nVLA,I made the worlds most powerful soccer shoe,Get a free 14-day trial of Odoo's all-in-one b...,2024-06-22 15:00:22+00:00,UCJLZe_NoiG0hT7QCX_9vmqw,I did a thing,24,Entertainment,[],1290,2275736,125152,4457,20-25 min,0,15


In [32]:
# Viewing the data types of the preprocessed dataset for AU(Australia) region
data_types(trending_videos_AU)

video_id                       object
title                          object
description                    object
published_at      datetime64[ns, UTC]
channel_id                     object
channel_title                  object
category_id                     int64
category_name                  object
tags                           object
duration                        int64
view_count                      int64
like_count                      int64
comment_count                   int64
duration_range               category
tag_count                       int64
published_hour                  int32
dtype: object

In [33]:
# Viewing the processed dataset for CA(Canada) region
trending_videos_CA.head()

Unnamed: 0,video_id,title,description,published_at,channel_id,channel_title,category_id,category_name,tags,duration,view_count,like_count,comment_count,duration_range,tag_count,published_hour
0,gTkMhlrIl-4,House of the Dragon Season 2 | Episode 3 Previ...,Reason will be forgotten. A new episode of the...,2024-06-24 02:00:27+00:00,UCx-KWLTKlB83hDI6UKECtJQ,Max,24,Entertainment,"[max, hbo max, hbo, max originals, max streami...",66,546950,12793,1407,0-5 min,34,2
1,FyG21rXCxlY,Red Velvet 레드벨벳 'Cosmic' MV,"Red Velvet's new album ""Cosmic"" is out!\nListe...",2024-06-24 09:44:02+00:00,UCEf_Bc-KVd7onSeifS3py9g,SMTOWN,10,Music,"[Red Velvet, 레드벨벳, 레벨, RV, 아이린, 웬디, 슬기, 조이, 예리...",230,3242960,334848,19547,0-5 min,16,9
2,U0Mq3mJdwh4,MEGA BOXES ARE BACK!!!,This episode of Brawl Talk brings back the mos...,2024-06-22 15:00:18+00:00,UCooVYzDxdwTtGYAkcPmOgOw,Brawl Stars,20,Gaming,"[brawl stars, supercell game, megabox, mega, b...",533,26911535,1320553,166472,5-10 min,16,15
3,bcH7050gXc0,Six Days Camping in the Deep Wilderness - Catc...,https://mavoutdoors.com/\n\nBook a stay at my ...,2024-06-23 22:30:06+00:00,UCL_BZpt0J9Kqwy6YPWr30ow,Mav,24,Entertainment,"[Mav, Mavrikjoos, mavrik joos, Truck Camping]",3589,240585,10735,767,55-60 min,4,22
4,tAnC8Hvqhm8,Unlocking Unlimited Heroes | Community Event,Follow us on our Socials!\nTwitter ► https://t...,2024-06-24 07:58:00+00:00,UCD1Em4q90ZUK2R5HKesszJg,Clash of Clans,20,Gaming,"[clash of clans, COC, Clash of Clans Gameplay,...",107,381907,15959,1497,0-5 min,18,7


In [34]:
# Viewing the data types of the preprocessed dataset for CA(Canada) region
data_types(trending_videos_CA)

video_id                       object
title                          object
description                    object
published_at      datetime64[ns, UTC]
channel_id                     object
channel_title                  object
category_id                     int64
category_name                  object
tags                           object
duration                        int64
view_count                      int64
like_count                      int64
comment_count                   int64
duration_range               category
tag_count                       int64
published_hour                  int32
dtype: object

In [35]:
# Viewing the processed dataset for GB(United Kingdom) region
trending_videos_GB.head()

Unnamed: 0,video_id,title,description,published_at,channel_id,channel_title,category_id,category_name,tags,duration,view_count,like_count,comment_count,duration_range,tag_count,published_hour
0,e3HpmhBlKNQ,Mazza L20 - Fire in the Booth,"Mazza L20 is in the studio in London, UK for h...",2024-06-23 15:00:08+00:00,UCB5-9DHv6C6WMJOksBocwJQ,CharlieSloth,10,Music,"[charlie sloth, apple music, fire in the booth...",655,344777,17270,2314,10-15 min,15,15
1,BhwpIUm34n4,GUESS THE MUSICIAN!,Thank you to Azar for sponsoring this video! \...,2024-06-23 16:27:41+00:00,UCWZmCMB7mmKWcXJSIPRhzZw,Miniminter,24,Entertainment,"[simon, sidemen, miniminter, mm7games, random,...",1897,512136,27025,763,30-35 min,6,16
2,gTkMhlrIl-4,House of the Dragon Season 2 | Episode 3 Previ...,Reason will be forgotten. A new episode of the...,2024-06-24 02:00:27+00:00,UCx-KWLTKlB83hDI6UKECtJQ,Max,24,Entertainment,"[max, hbo max, hbo, max originals, max streami...",66,546950,12796,1407,0-5 min,34,2
3,FyG21rXCxlY,Red Velvet 레드벨벳 'Cosmic' MV,"Red Velvet's new album ""Cosmic"" is out!\nListe...",2024-06-24 09:44:02+00:00,UCEf_Bc-KVd7onSeifS3py9g,SMTOWN,10,Music,"[Red Velvet, 레드벨벳, 레벨, RV, 아이린, 웬디, 슬기, 조이, 예리...",230,3242960,334848,19547,0-5 min,16,9
4,z8_rVvmC6t8,Here's why I sold my Lotus Emira and bought th...,I recently decided to sell my Lotus Emira and ...,2024-06-23 16:00:50+00:00,UCIB5XXHNAWWzTOw6guIMYCg,Harry's garage,2,Autos & Vehicles,[],1355,359128,20309,1690,20-25 min,0,16


In [36]:
# Viewing the data types of the preprocessed dataset for GB(United Kingdom) region
data_types(trending_videos_GB)

video_id                       object
title                          object
description                    object
published_at      datetime64[ns, UTC]
channel_id                     object
channel_title                  object
category_id                     int64
category_name                  object
tags                           object
duration                        int64
view_count                      int64
like_count                      int64
comment_count                   int64
duration_range               category
tag_count                       int64
published_hour                  int32
dtype: object

In [37]:
# Viewing the processed dataset for IN(India) region
trending_videos_IN.head()

Unnamed: 0,video_id,title,description,published_at,channel_id,channel_title,category_id,category_name,tags,duration,view_count,like_count,comment_count,duration_range,tag_count,published_hour
0,nJDclWEjGPA,#TheGOATBdayShots | Thalapathy Vijay | Venkat ...,A special GOATbdayshots from the film The Grea...,2024-06-21 18:31:00+00:00,UC9WXzTgk10ncJX1eOxHElCg,AGS Entertainment,1,Film & Animation,"[Thalapathy Vijay, Thalapathy Vijay birthday, ...",51,8993996,524644,12375,0-5 min,19,18
1,4P_k0rqmyX8,Chinna Chinna Kangal (Lyrical) | The Greatest ...,Get ready to melt in this soul stirring melody...,2024-06-22 12:02:01+00:00,UCq-Fj5jknLsUf-MWSy4_brA,T-Series,10,Music,"[hindi songs 2024, hindi songs new, bollywood ...",282,6418847,578522,19950,0-5 min,20,12
2,b8Nt2_EpgvY,Anubhuti kharadina ra || Khordha toka || Funny...,Experience the joy of trading on Binomo and ea...,2024-06-24 04:27:52+00:00,UCmuSPlchjLraieuEs33mcVA,khordha toka,23,Comedy,"[khordha toka, new khordha toka vdo, dalema vd...",1365,413297,38097,1301,20-25 min,16,4
3,G4u0jS6SRTw,Armaan को पड़ा Kritika और Payal पर Comment मारन...,📲 Download JioCinema: https://bit.ly/3Be09Z3 \...,2024-06-23 13:30:11+00:00,UC8To9CFsZzvPafxMLzS08iA,JioCinema,24,Entertainment,"[bigg boss ott 3, bigg boss ott 3 contestants,...",40,1236788,10287,561,0-5 min,23,13
4,4N9N7tyR3gQ,ASKING MONEY FROM S8UL CREATORS,THANKS FOR WATCHING THE VIDEO. DON'T FORGET TO...,2024-06-24 04:30:03+00:00,UCPzIOAKAvphrnIfa9wFj69w,8bit MAMBA,24,Entertainment,"[8bit, 8bit mamba, mamba]",1168,542233,82977,2181,15-20 min,3,4


In [38]:
# Viewing the data types of the preprocessed dataset for IN(India) region
data_types(trending_videos_IN)

video_id                       object
title                          object
description                    object
published_at      datetime64[ns, UTC]
channel_id                     object
channel_title                  object
category_id                     int64
category_name                  object
tags                           object
duration                        int64
view_count                      int64
like_count                      int64
comment_count                   int64
duration_range               category
tag_count                       int64
published_hour                  int32
dtype: object

In [39]:
# Viewing the processed dataset for US(United States) region
trending_videos_US.head()

Unnamed: 0,video_id,title,description,published_at,channel_id,channel_title,category_id,category_name,tags,duration,view_count,like_count,comment_count,duration_range,tag_count,published_hour
0,E_sVLtTODRk,Chicago Sky vs. Indiana Fever | FULL GAME HIGH...,The Chicago Sky and Indiana Fever competed in ...,2024-06-23 22:31:54+00:00,UCO9a_ryN_l7DIDS-VIt-zmw,WNBA,17,Sports,"[wnba, nba, basketball, women, highlights, ama...",570,513456,6506,2588,5-10 min,7,22
1,FyG21rXCxlY,Red Velvet 레드벨벳 'Cosmic' MV,"Red Velvet's new album ""Cosmic"" is out!\nListe...",2024-06-24 09:44:02+00:00,UCEf_Bc-KVd7onSeifS3py9g,SMTOWN,10,Music,"[Red Velvet, 레드벨벳, 레벨, RV, 아이린, 웬디, 슬기, 조이, 예리...",230,3242960,334863,19547,0-5 min,16,9
2,qReN9SFd35o,SHA'CARRI TO PARIS: Richardson SCORCHES 100m T...,"Sha'Carri Richardson, the world's fastest woma...",2024-06-23 03:38:48+00:00,UCqZQlzSHbVJrwrn5XvzrzcA,NBC Sports,17,Sports,"[olympics, nbc sports, track and field, shacar...",416,1260219,26439,2539,5-10 min,27,3
3,U0Mq3mJdwh4,MEGA BOXES ARE BACK!!!,This episode of Brawl Talk brings back the mos...,2024-06-22 15:00:18+00:00,UCooVYzDxdwTtGYAkcPmOgOw,Brawl Stars,20,Gaming,"[brawl stars, supercell game, megabox, mega, b...",533,26911535,1320553,166472,5-10 min,16,15
4,bcH7050gXc0,Six Days Camping in the Deep Wilderness - Catc...,https://mavoutdoors.com/\n\nBook a stay at my ...,2024-06-23 22:30:06+00:00,UCL_BZpt0J9Kqwy6YPWr30ow,Mav,24,Entertainment,"[Mav, Mavrikjoos, mavrik joos, Truck Camping]",3589,240585,10735,767,55-60 min,4,22


In [40]:
# Viewing the data types of the preprocessed dataset for US(United States) region
data_types(trending_videos_US)

video_id                       object
title                          object
description                    object
published_at      datetime64[ns, UTC]
channel_id                     object
channel_title                  object
category_id                     int64
category_name                  object
tags                           object
duration                        int64
view_count                      int64
like_count                      int64
comment_count                   int64
duration_range               category
tag_count                       int64
published_hour                  int32
dtype: object

In [41]:
# Preprocessing of the dataset is completed successfully
print("Preprocessing Completed Successfully")

Preprocessing Completed Successfully


In [42]:
# Function to update the csv file with the preprocessed data
def update_csv_file(df, region_code):
    filename = f"trending_videos_{region_code}.csv"
    df.to_csv(filename, index=False)
    print(f"CSV file updated for {region_code} region")

In [43]:
# Updating the csv file with the preprocessed data
update_csv_file(trending_videos_AU, 'AU')
update_csv_file(trending_videos_CA, 'CA')
update_csv_file(trending_videos_GB, 'GB')
update_csv_file(trending_videos_IN, 'IN')
update_csv_file(trending_videos_US, 'US')

CSV file updated for AU region
CSV file updated for CA region
CSV file updated for GB region
CSV file updated for IN region
CSV file updated for US region
