In [305]:
import json
import pandas as pd
import re
import numpy as np
import seaborn as sns

In [306]:
# "Data is included for the IN, US, GB, DE, CA, FR, RU, BR, MX, KR, and JP regions 
# (India, USA, Great Britain, Germany, Canada, France, Russia, Brazil, Mexico, South Korea, and, Japan respectively)"
# please enter region in two letter format (like first line) and in quotation on variable region
region = "US"

#display_limit = 1000

The below code checks for data existance as well as some initializations

In [307]:
from os import listdir

data_files_location = {}
categories = {}

for f in listdir('./data'):
    if f.endswith('.json'):
        temp = json.load(open(f'./data/{f}'))
        for i in temp['items']:
            categories[i['id']] = i['snippet']['title']
    if f.endswith('.csv'):
        data_files_location[f[:2]] = f'./data/{f}'

if not categories:
    raise FileNotFoundError('can not locate any json category files')
    
if not data_files_location:
    raise FileNotFoundError('not CSV data files are located')

Load and assign CSV files onto variables with Pandas, and
map the categories from the .json file to the dataframe in a new column, labeled 'videoCategory'.

In [330]:
data = pd.read_csv(data_files_location[region])
data.drop(['thumbnail_link', 'comments_disabled', 'ratings_disabled', 'comment_count'], inplace=True, axis=1)

data['videoCategory'] = data['categoryId'].astype(str).map(categories)

data

Unnamed: 0,video_id,title,publishedAt,channelId,channelTitle,categoryId,trending_date,tags,view_count,likes,dislikes,description,videoCategory
0,3C66w5Z0ixs,I ASKED HER TO BE MY GIRLFRIEND...,2020-08-11T19:20:14Z,UCvtRTOMP2TqYqu51xNrqAzg,Brawadis,22,2020-08-12T00:00:00Z,brawadis|prank|basketball|skits|ghost|funny vi...,1514614,156908,5855,SUBSCRIBE to BRAWADIS ▶ http://bit.ly/Subscrib...,People & Blogs
1,M9Pmf9AB4Mo,Apex Legends | Stories from the Outlands – “Th...,2020-08-11T17:00:10Z,UC0ZV6M2THA81QT9hrVWJG3A,Apex Legends,20,2020-08-12T00:00:00Z,Apex Legends|Apex Legends characters|new Apex ...,2381688,146739,2794,"While running her own modding shop, Ramya Pare...",Gaming
2,J78aPJ3VyNs,I left youtube for a month and THIS is what ha...,2020-08-11T16:34:06Z,UCYzPXprvl5Y-Sf0g4vX-m6g,jacksepticeye,24,2020-08-12T00:00:00Z,jacksepticeye|funny|funny meme|memes|jacksepti...,2038853,353787,2628,I left youtube for a month and this is what ha...,Entertainment
3,kXLn3HkpjaA,XXL 2020 Freshman Class Revealed - Official An...,2020-08-11T16:38:55Z,UCbg_UMjlHJg_19SZckaKajg,XXL,10,2020-08-12T00:00:00Z,xxl freshman|xxl freshmen|2020 xxl freshman|20...,496771,23251,1856,Subscribe to XXL → http://bit.ly/subscribe-xxl...,Music
4,VIUo6yapDbc,Ultimate DIY Home Movie Theater for The LaBran...,2020-08-11T15:10:05Z,UCDVPcEbVLQgLZX0Rt6jo34A,Mr. Kate,26,2020-08-12T00:00:00Z,The LaBrant Family|DIY|Interior Design|Makeove...,1123889,45802,964,Transforming The LaBrant Family's empty white ...,Howto & Style
...,...,...,...,...,...,...,...,...,...,...,...,...,...
87586,FdzPV69i4Aw,Great Idea 💡 #shorts #tips #short,2021-10-12T12:00:12Z,UCGLDtG2tl0uG8P0eNstbLUA,Tool_Tips,26,2021-10-17T00:00:00Z,[None],3241407,27687,9037,,Howto & Style
87587,40_wdosHJaQ,I Was In The Funniest Minecraft Competition Wi...,2021-10-12T00:15:10Z,UCFbZ2e9IrPejOdp8wsKUxvA,QuackiTwo,24,2021-10-17T00:00:00Z,Quackity|Quackitwo|Quackity Second Channel|Qua...,425760,48882,215,STREAMED LIVE ON TWITCH: https://www.twitch.tv...,Entertainment
87588,cNG1QNQPlNE,Summoner Changes | FFXIV Endwalker Media Tour,2021-10-13T12:00:29Z,UCQjKMGUEzBmEHltb1OIMLUg,Larryzaur,20,2021-10-17T00:00:00Z,ffxiv|final fantasy xiv|ff14|final fantasy 14|...,203634,7065,52,Endwalker Media Tour Playlist ► https://www.yo...,Gaming
87589,iYTK_1Sq8ZQ,Reaper Overview | FFXIV Endwalker Media Tour,2021-10-13T12:00:32Z,UCQjKMGUEzBmEHltb1OIMLUg,Larryzaur,20,2021-10-17T00:00:00Z,ffxiv|final fantasy xiv|ff14|final fantasy 14|...,210207,6884,43,Endwalker Media Tour Playlist ► https://www.yo...,Gaming


The amount of videos that have trended in each video category:

In [334]:
data.videoCategory.value_counts()

Entertainment            17615
Music                    16552
Gaming                   14458
Sports                    9574
People & Blogs            7524
Comedy                    4721
Film & Animation          3364
News & Politics           3315
Science & Technology      3199
Howto & Style             2933
Education                 1870
Autos & Vehicles          1556
Pets & Animals             474
Travel & Events            351
Nonprofits & Activism       85
Name: videoCategory, dtype: int64

#### Filtering Out Duplicate Videos
Many videos have trended more than once. Here, we will filter out videos that have multiple entries to obtain the most recent entry, with the latest number of view counts, likes, and dislikes. 

- There are 87,591 entries in the top-trending dataset with:
    - 15,180 unique videos
    - 4,252 unique channels


In [335]:
# the number of unique videos in the datset
unique_videos = data.video_id.nunique()
unique_videos

15180

In [336]:
# the number of unique channels in the dataset
unique_channels = data.channelId.nunique()
unique_channels

4252

In [325]:
final_views = data.sort_values('view_count', ascending=False).drop_duplicates(['video_id'])
final_views

Unnamed: 0,video_id,title,publishedAt,channelId,channelTitle,categoryId,trending_date,tags,view_count,likes,dislikes,description,videoCategory
56374,WMweEpGlu_U,BTS (방탄소년단) 'Butter' Official MV,2021-05-21T03:46:13Z,UC3IZKseVpdzPSBaWxBxundA,HYBE LABELS,10,2021-05-30T00:00:00Z,BIGHIT|빅히트|방탄소년단|BTS|BANGTAN|방탄,264407389,16021534,150989,BTS (방탄소년단) 'Butter' Official MV Credits: Dire...,Music
3358,gdZLi9oWNZg,BTS (방탄소년단) 'Dynamite' Official MV,2020-08-21T03:58:10Z,UC3IZKseVpdzPSBaWxBxundA,Big Hit Labels,10,2020-08-28T00:00:00Z,BIGHIT|빅히트|방탄소년단|BTS|BANGTAN|방탄,232649205,15735533,714194,BTS (방탄소년단) 'Dynamite' Official MVCredits:Dire...,Music
73564,hdmx71UjBXs,Turn into orbeez - Tutorial #Shorts,2021-07-03T04:04:57Z,UCt8z2S30Wl-GQEluFVM8NUw,FFUNTV,24,2021-08-08T00:00:00Z,[None],206202284,6840430,240769,Turn into orbeez - Tutorial #ShortsHey guys! W...,Entertainment
4980,vRXZj0DzXIA,BLACKPINK - 'Ice Cream (with Selena Gomez)' M/V,2020-08-28T04:00:11Z,UCOmHUn--16B90oW2L6FRR3A,BLACKPINK,10,2020-09-05T00:00:00Z,YG Entertainment|YG|와이지|K-pop|BLACKPINK|블랙핑크|블...,184778248,11795670,879354,BLACKPINK - ‘Ice Cream (with Selena Gomez)’Com...,Music
68979,CuklIb9d3fI,BTS (방탄소년단) 'Permission to Dance' Official MV,2021-07-09T03:59:12Z,UC3IZKseVpdzPSBaWxBxundA,HYBE LABELS,10,2021-07-16T00:00:00Z,HYBE|HYBE LABELS|하이브|하이브레이블즈,156482499,12117314,102132,BTS (방탄소년단) 'Permission to Dance' Official MVC...,Music
...,...,...,...,...,...,...,...,...,...,...,...,...,...
15289,r7nYQXsxJdU,HBCU Homecoming 2020: Meet Me On The Yard,2020-10-25T01:40:31Z,UCqVDpXKLmKeBU_yyt_QkItQ,YouTube Originals,24,2020-10-27T00:00:00Z,2 CHAINZ|DESI BANKS|LIONEL RICHIE |LANCE GROSS...,0,4257,384,HBCU Homecoming 2020 is going down! We welcome...,Entertainment
65049,kmk5vciFbek,Demi Lovato performs their greatest hits this ...,2021-06-26T00:07:19Z,UCZkURf9tDolFOeuw_4RD7XQ,Demi Lovato,24,2021-06-27T00:00:00Z,Pride|Pride 2021|YouTube Pride|YouTube Pride 2...,0,0,0,"On June 25, celebrate Pride 2021 with Demi Lov...",Entertainment
74964,Hb3rmh-_FMw,Introducing the shorter side of YouTube,2021-08-10T15:04:25Z,UCBR8-60-B28hp2BmDPdntcQ,YouTube,27,2021-08-15T00:00:00Z,[None],0,22585,1636,*****EPILEPSY WARNING ********Watch and create...,Education
73992,HcSwBJY7Xew,Watch The Weeknd and create short videos on th...,2021-08-10T15:08:22Z,UCBR8-60-B28hp2BmDPdntcQ,YouTube,27,2021-08-11T00:00:00Z,[None],0,50071,7256,,Education


In [339]:
initial_views = data.sort_values('view_count', ascending=True).drop_duplicates(['video_id'])
initial_views

AttributeError: 'DataFrame' object has no attribute 'to_list'

Entertainment            17615
Music                    16552
Gaming                   14458
Sports                    9574
People & Blogs            7524
Comedy                    4721
Film & Animation          3364
News & Politics           3315
Science & Technology      3199
Howto & Style             2933
Education                 1870
Autos & Vehicles          1556
Pets & Animals             474
Travel & Events            351
Nonprofits & Activism       85
Name: videoCategory, dtype: int64