### Data Cleaning and Analysing

In [1]:
import pandas as pd

In [2]:
yt_df = pd.read_csv('yt_trending_data.csv')
yt_df.head()

Unnamed: 0.1,Unnamed: 0,id,title,view,like,comment,category_id,duration_ios,channel_name,description,tag,published_at
0,0,pE1qTi_Lt8E,Vettaiyan - Prevue | Rajinikanth | Amitabh Bac...,4447442,215073,6850,24,PT1M38S,Sun TV,Vettaiyan - Prevue | Rajinikanth | Amitabh Bac...,"['vettaiyan official prevue', 'vettaiyan tease...",2024-09-20T13:39:03Z
1,1,364HfM-UT18,JAM | Part 2 | Karikku | Comedy,2781882,194930,6742,23,PT14M8S,Karikku,https://forms.gle/Nh4WRJqToFW8FRcYA\n\nഅക്കൗണ്...,"['karikku', 'malayalam', 'funny', 'comedy', 'w...",2024-09-20T12:29:58Z
2,2,Bdyjlkf_QYo,Shaadi ka Rishta - Looking for the ideal Dulhan,3291806,239537,15143,22,PT15M32S,Triggered Insaan,Today we are looking at some really funny matr...,"['triggered insaan', 'live insaan', 'funny', '...",2024-09-20T08:45:00Z
3,3,P68GjnVIcC4,Jafaa - Ep 18 - [CC] 20th Sep 2024 - Sponsored...,6189198,120255,2054,24,PT36M3S,HUM TV,UK Audience 🇬🇧 Book Your Tickets Now!! https:/...,"['pakistani serial', 'drama in hindi', 'latest...",2024-09-20T16:00:32Z
4,4,2zZuCBscrlI,Travis Head Strikes Brilliant 154 | Highlights...,3490462,65321,3201,17,PT15M4S,England & Wales Cricket Board,Go to ecb.co.uk to join We Are England Cricket...,"['cricket videos', 'highlights', 'cricket', 'e...",2024-09-19T19:28:48Z


In [3]:
# reformat date column
yt_df['published_at'] = pd.to_datetime(yt_df['published_at'])

In [4]:
# delete unwanted row
yt_df = yt_df.drop('Unnamed: 0', axis=1)

In [5]:
yt_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 11 columns):
 #   Column        Non-Null Count  Dtype              
---  ------        --------------  -----              
 0   id            50 non-null     object             
 1   title         50 non-null     object             
 2   view          50 non-null     int64              
 3   like          50 non-null     int64              
 4   comment       50 non-null     int64              
 5   category_id   50 non-null     int64              
 6   duration_ios  50 non-null     object             
 7   channel_name  50 non-null     object             
 8   description   50 non-null     object             
 9   tag           46 non-null     object             
 10  published_at  50 non-null     datetime64[ns, UTC]
dtypes: datetime64[ns, UTC](1), int64(4), object(6)
memory usage: 4.4+ KB


In [6]:
yt_df['like'] = yt_df['like'].astype('int')

In [7]:
yt_df.head()

Unnamed: 0,id,title,view,like,comment,category_id,duration_ios,channel_name,description,tag,published_at
0,pE1qTi_Lt8E,Vettaiyan - Prevue | Rajinikanth | Amitabh Bac...,4447442,215073,6850,24,PT1M38S,Sun TV,Vettaiyan - Prevue | Rajinikanth | Amitabh Bac...,"['vettaiyan official prevue', 'vettaiyan tease...",2024-09-20 13:39:03+00:00
1,364HfM-UT18,JAM | Part 2 | Karikku | Comedy,2781882,194930,6742,23,PT14M8S,Karikku,https://forms.gle/Nh4WRJqToFW8FRcYA\n\nഅക്കൗണ്...,"['karikku', 'malayalam', 'funny', 'comedy', 'w...",2024-09-20 12:29:58+00:00
2,Bdyjlkf_QYo,Shaadi ka Rishta - Looking for the ideal Dulhan,3291806,239537,15143,22,PT15M32S,Triggered Insaan,Today we are looking at some really funny matr...,"['triggered insaan', 'live insaan', 'funny', '...",2024-09-20 08:45:00+00:00
3,P68GjnVIcC4,Jafaa - Ep 18 - [CC] 20th Sep 2024 - Sponsored...,6189198,120255,2054,24,PT36M3S,HUM TV,UK Audience 🇬🇧 Book Your Tickets Now!! https:/...,"['pakistani serial', 'drama in hindi', 'latest...",2024-09-20 16:00:32+00:00
4,2zZuCBscrlI,Travis Head Strikes Brilliant 154 | Highlights...,3490462,65321,3201,17,PT15M4S,England & Wales Cricket Board,Go to ecb.co.uk to join We Are England Cricket...,"['cricket videos', 'highlights', 'cricket', 'e...",2024-09-19 19:28:48+00:00


In [8]:
# id categories
category_mapping = {1: 'Film & Animation',2: 'Autos & Vehicles',10: 'Music',15: 'Pets & Animals',17: 'Sports',18: 'Short Movies',19: 'Travel & Events',20: 'Gaming',21: 'Videoblogging',22: 'People & Blogs',23: 'Comedy',24: 'Entertainment',25: 'News & Politics',26: 'Howto & Style',27: 'Education',28: 'Science & Technology',29: 'Nonprofits & Activism',30: 'Movies',31: 'Anime/Animation',32: 'Action/Adventure',33: 'Classics',34: 'Comedy',35: 'Documentary',36: 'Drama',37: 'Family',38: 'Foreign',39: 'Horror',40: 'Sci-Fi/Fantasy',41: 'Thriller',42: 'Shorts',43: 'Shows',44: 'Trail'}

In [9]:
# map category id with category
yt_df['category_name'] = yt_df['category_id'].map(category_mapping)

In [10]:
# function convert_duration
import re
def convert_duration(duration_str):
    match = re.match('PT(\d+)H(\d+)M(\d+)S', duration_str)
    if match:
        hr = match.group(1)
        min = match.group(2)
        sec = match.group(3)

        return f'{hr}h {min}m {sec}s'
        
    match = re.match('PT(\d+)M(\d+)S', duration_str)
    if match:
        min = match.group(1)
        sec = match.group(2)

        return f'{min}m {sec}s'

    match = re.match('PT(\d+)S', duration_str)
    if match:
        sec = match.group(1)
        return f'0m {sec}s'

In [11]:
# convert duration
yt_df['duration'] = yt_df['duration_ios'].apply(convert_duration)

## Engagement Metrics

### Engagement Rate %

In [12]:
yt_df['engagement_rate'] = ((yt_df['like'] + yt_df['comment']) / yt_df['view'] * 100).round(2)

In [13]:
yt_df.head(2)

Unnamed: 0,id,title,view,like,comment,category_id,duration_ios,channel_name,description,tag,published_at,category_name,duration,engagement_rate
0,pE1qTi_Lt8E,Vettaiyan - Prevue | Rajinikanth | Amitabh Bac...,4447442,215073,6850,24,PT1M38S,Sun TV,Vettaiyan - Prevue | Rajinikanth | Amitabh Bac...,"['vettaiyan official prevue', 'vettaiyan tease...",2024-09-20 13:39:03+00:00,Entertainment,1m 38s,4.99
1,364HfM-UT18,JAM | Part 2 | Karikku | Comedy,2781882,194930,6742,23,PT14M8S,Karikku,https://forms.gle/Nh4WRJqToFW8FRcYA\n\nഅക്കൗണ്...,"['karikku', 'malayalam', 'funny', 'comedy', 'w...",2024-09-20 12:29:58+00:00,Comedy,14m 8s,7.25


### Like-Dislike Ratio

In [14]:
yt_df['like_dislike_ratio'] = (yt_df['like'] / (yt_df['like'] + yt_df['comment'])).round(2)
yt_df.head()

Unnamed: 0,id,title,view,like,comment,category_id,duration_ios,channel_name,description,tag,published_at,category_name,duration,engagement_rate,like_dislike_ratio
0,pE1qTi_Lt8E,Vettaiyan - Prevue | Rajinikanth | Amitabh Bac...,4447442,215073,6850,24,PT1M38S,Sun TV,Vettaiyan - Prevue | Rajinikanth | Amitabh Bac...,"['vettaiyan official prevue', 'vettaiyan tease...",2024-09-20 13:39:03+00:00,Entertainment,1m 38s,4.99,0.97
1,364HfM-UT18,JAM | Part 2 | Karikku | Comedy,2781882,194930,6742,23,PT14M8S,Karikku,https://forms.gle/Nh4WRJqToFW8FRcYA\n\nഅക്കൗണ്...,"['karikku', 'malayalam', 'funny', 'comedy', 'w...",2024-09-20 12:29:58+00:00,Comedy,14m 8s,7.25,0.97
2,Bdyjlkf_QYo,Shaadi ka Rishta - Looking for the ideal Dulhan,3291806,239537,15143,22,PT15M32S,Triggered Insaan,Today we are looking at some really funny matr...,"['triggered insaan', 'live insaan', 'funny', '...",2024-09-20 08:45:00+00:00,People & Blogs,15m 32s,7.74,0.94
3,P68GjnVIcC4,Jafaa - Ep 18 - [CC] 20th Sep 2024 - Sponsored...,6189198,120255,2054,24,PT36M3S,HUM TV,UK Audience 🇬🇧 Book Your Tickets Now!! https:/...,"['pakistani serial', 'drama in hindi', 'latest...",2024-09-20 16:00:32+00:00,Entertainment,36m 3s,1.98,0.98
4,2zZuCBscrlI,Travis Head Strikes Brilliant 154 | Highlights...,3490462,65321,3201,17,PT15M4S,England & Wales Cricket Board,Go to ecb.co.uk to join We Are England Cricket...,"['cricket videos', 'highlights', 'cricket', 'e...",2024-09-19 19:28:48+00:00,Sports,15m 4s,1.96,0.95


### View-Comment Ratio

In [15]:
yt_df['views_comment_ratio'] = ((yt_df['view'] / yt_df['comment'])).round(2)
yt_df.head(2)

Unnamed: 0,id,title,view,like,comment,category_id,duration_ios,channel_name,description,tag,published_at,category_name,duration,engagement_rate,like_dislike_ratio,views_comment_ratio
0,pE1qTi_Lt8E,Vettaiyan - Prevue | Rajinikanth | Amitabh Bac...,4447442,215073,6850,24,PT1M38S,Sun TV,Vettaiyan - Prevue | Rajinikanth | Amitabh Bac...,"['vettaiyan official prevue', 'vettaiyan tease...",2024-09-20 13:39:03+00:00,Entertainment,1m 38s,4.99,0.97,649.26
1,364HfM-UT18,JAM | Part 2 | Karikku | Comedy,2781882,194930,6742,23,PT14M8S,Karikku,https://forms.gle/Nh4WRJqToFW8FRcYA\n\nഅക്കൗണ്...,"['karikku', 'malayalam', 'funny', 'comedy', 'w...",2024-09-20 12:29:58+00:00,Comedy,14m 8s,7.25,0.97,412.62


### Highest & Lowest View Video

In [16]:
hightest_view_video = yt_df.loc[yt_df['view'].idxmax(), ['title', 'view']] # return index
# yt_df['view'].max()
hightest_view_video

title    Kabhi Main Kabhi Tum Episode 21 | Fahad Mustaf...
view                                              19930722
Name: 29, dtype: object

In [17]:
lowest_view_video = yt_df.loc[yt_df['view'].idxmin(), ['title', 'view']] # return index
# yt_df['view'].min()
lowest_view_video

title    Advocate Mathivathani Speech about Periyar | U...
view                                                 47334
Name: 48, dtype: object

### Top 5 Videos by Engagement Rate

In [18]:
top_engagement_video = yt_df.nlargest(5, 'engagement_rate')
top_engagement_video[['title', 'engagement_rate']]

Unnamed: 0,title,engagement_rate
11,IPHONE 16 PRO MAX GAMING TEST : BGMI PERFORMAN...,11.2
6,200 STREAK CHALLENGE IN FREE FIRE 😱🔥 YouTubers...,10.8
2,Shaadi ka Rishta - Looking for the ideal Dulhan,7.74
10,OUT OF SYLLABUS | Jaspreet Singh Standup Comedy,7.72
42,Reacting to Funniest India’s got Latent Memes 😂,7.72


### Average Likes per Video

In [19]:
avg_views_per_video = yt_df['like'].mean()
print('Average Likes per Video:', avg_views_per_video)

Average Likes per Video: 82151.12


### Most Commented Video

In [20]:
most_commented_video = yt_df['comment'].idxmax()
most_commented_video = yt_df.loc[most_commented_video]
most_commented_video[['title', 'comment']]

title      INDIA'S GOT LATENT | EP 06 ft. @VipulGoyal @Jo...
comment                                                20060
Name: 41, dtype: object

## Category Analysis

### All categories list

In [21]:
category_list = yt_df['category_name'].unique().tolist()
category_list

['Entertainment',
 'Comedy',
 'People & Blogs',
 'Sports',
 'Music',
 'Gaming',
 'Howto & Style',
 'Science & Technology']

### Average Views per Category

In [22]:
avg_per_cat = yt_df.groupby('category_name')['view'].mean().round(2).sort_values(ascending=False).reset_index()
avg_per_cat

Unnamed: 0,category_name,view
0,Entertainment,3230841.17
1,Music,2101176.29
2,Sports,1871031.5
3,Science & Technology,1755673.33
4,People & Blogs,1521070.33
5,Comedy,1200071.0
6,Howto & Style,748927.0
7,Gaming,602237.0


### One Hour video

In [23]:
yt_df = yt_df.dropna(subset=['duration'])
one_hr_video = yt_df[yt_df['duration'].str.contains('h')]
one_hr_video

Unnamed: 0,id,title,view,like,comment,category_id,duration_ios,channel_name,description,tag,published_at,category_name,duration,engagement_rate,like_dislike_ratio,views_comment_ratio
25,eUpGeIm536U,"Krushna & Kashmera: Love, Roasts, and Epic Bac...",1567883,51088,2462,23,PT1H17M4S,BHARTI TV,"Hello Ji, Namaste Dooston Bharti TV Podcast Me...","['bharti laughterqueen podcast', 'bharti harsh...",2024-09-20 03:30:07+00:00,Comedy,1h 17m 4s,3.42,0.95,636.83
41,aQpJ7OgXVn8,INDIA'S GOT LATENT | EP 06 ft. @VipulGoyal @Jo...,11033087,554529,20060,24,PT1H5M29S,Samay Raina,Become a MEMBER by clicking below!!!\nhttps://...,"['samay raina', 'samay raina comedy', 'samay r...",2024-09-15 16:00:09+00:00,Entertainment,1h 5m 29s,5.21,0.97,550.0


### Videos with less than 100 Comments

In [24]:
video_no_comments = yt_df[yt_df['comment'] <= 100]
video_no_comments

Unnamed: 0,id,title,view,like,comment,category_id,duration_ios,channel_name,description,tag,published_at,category_name,duration,engagement_rate,like_dislike_ratio,views_comment_ratio
9,A5nzekj9PN0,ஸ்ரீனிவாச கோவிந்தா | புரட்டாசி சனிக்கிழமை கேளு...,77654,526,25,10,PT44M9S,Sakthi Audio,அபூர்வா ஆடியோ பக்தியுடன் வழங்கும் நினைத்ததை நி...,"['angalamman pambai udukkai songs', 'amman pam...",2024-09-20 23:30:04+00:00,Music,44m 9s,0.71,0.95,3106.16
33,8s523BVDeVQ,చుక్క నూనె లేకుండా హెల్దీ లంచ్ బాక్స్ రిసిపీ అ...,104393,1591,93,22,PT10M3S,Manalo Mana Maata,THE INDUS VALLEY🤩💚\nGet up to 40% discount + E...,"['amma chethi vanta', 'lunch box recipes', 'lu...",2024-09-21 05:30:04+00:00,People & Blogs,10m 3s,1.61,0.94,1122.51
46,fbjgibI2frk,Before Marriage Vs After Marriage | EMI,67720,1866,39,23,PT13M51S,EMI,#EMItamil #emiimsaigal #beforemarriagevsafterm...,"['EMI', 'Emi Youtube Channel', 'Emi channel', ...",2024-09-20 14:00:42+00:00,Comedy,13m 51s,2.81,0.98,1736.41


### Top Channels

In [25]:
top_channels = yt_df.groupby('channel_name')['view'].sum().nlargest(5).reset_index()
top_channels

Unnamed: 0,channel_name,view
0,HAR PAL GEO,22083950
1,ARY Digital HD,19930722
2,Samay Raina,11033087
3,T-Series Tamil,9766825
4,HUM TV,6189198


###  Engagement Rate vs. Views Correlation

 The value is negative, meaning that as views increase, the engagement_rate tends to decrease slightly.

In [26]:
engagement_rate_views_corr = yt_df[['engagement_rate', 'view']].corr()
engagement_rate_views_corr

Unnamed: 0,engagement_rate,view
engagement_rate,1.0,-0.151404
view,-0.151404,1.0


In [27]:
yt_analysis_df = yt_df.drop(['duration_ios', 'description', 'tag', 'category_id'], axis=1)
yt_analysis_df.to_csv('youtube_trending_analysis.csv', index=False, float_format='%.2f')

In [28]:
yt_analysis_df.head()

Unnamed: 0,id,title,view,like,comment,channel_name,published_at,category_name,duration,engagement_rate,like_dislike_ratio,views_comment_ratio
0,pE1qTi_Lt8E,Vettaiyan - Prevue | Rajinikanth | Amitabh Bac...,4447442,215073,6850,Sun TV,2024-09-20 13:39:03+00:00,Entertainment,1m 38s,4.99,0.97,649.26
1,364HfM-UT18,JAM | Part 2 | Karikku | Comedy,2781882,194930,6742,Karikku,2024-09-20 12:29:58+00:00,Comedy,14m 8s,7.25,0.97,412.62
2,Bdyjlkf_QYo,Shaadi ka Rishta - Looking for the ideal Dulhan,3291806,239537,15143,Triggered Insaan,2024-09-20 08:45:00+00:00,People & Blogs,15m 32s,7.74,0.94,217.38
3,P68GjnVIcC4,Jafaa - Ep 18 - [CC] 20th Sep 2024 - Sponsored...,6189198,120255,2054,HUM TV,2024-09-20 16:00:32+00:00,Entertainment,36m 3s,1.98,0.98,3013.24
4,2zZuCBscrlI,Travis Head Strikes Brilliant 154 | Highlights...,3490462,65321,3201,England & Wales Cricket Board,2024-09-19 19:28:48+00:00,Sports,15m 4s,1.96,0.95,1090.43
