In [1]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

In [2]:
channels = pd.read_csv('channels_data_2023-03-17.csv')
videos = pd.read_csv('video_data_2023-03-17.csv')

In [3]:
channels.head()

Unnamed: 0,channel_id,title,description,published_at,uploads_id,view_count,subscriber_count,video_count
0,UCkyBtxQh0H5yOU5RjtHNr-w,Vania Ice,Vania Ice is a Burundian Female Artist Singer/...,2018-12-01 06:09:35+00:00,UUkyBtxQh0H5yOU5RjtHNr-w,425724,13200,8
1,UCMrF7OdgLb18wjk8U-wB5sg,Trey Zo & Rappy Boy,#dudeclickmadeboom\nTwo blood brothers doing m...,2016-02-19 23:54:25+00:00,UUMrF7OdgLb18wjk8U-wB5sg,2755721,36900,8
2,UCRuEBttZI9Q3oWwyj9F0byg,El Pro Burundi,,2015-11-03 09:57:14+00:00,UURuEBttZI9Q3oWwyj9F0byg,781401,25100,18
3,UCqFJ9aC_2uHG6c9YRnAW5zA,Big Fizzo Official,Welcome to the official YouTube channel of Big...,2016-04-08 20:27:08+00:00,UUqFJ9aC_2uHG6c9YRnAW5zA,6971548,112000,60
4,UChFNdXDc35jvpqY_BZUVpwg,Thizzy official,Artiste/chanteur,2016-02-22 15:52:06+00:00,UUhFNdXDc35jvpqY_BZUVpwg,91726,3260,15


# Questions this data answers

In [24]:
# Question 1: What is the average number of views per channel?
avg_views_per_channel = channels['view_count'].mean()
print(f"The average number of views per channel: {avg_views_per_channel}")
# Question 2: What is the average number of subscribers per channel?
avg_subscribers_per_channel = channels['subscriber_count'].mean()
print(f"The average number of subscribers per channel: {avg_subscribers_per_channel}")
# Question 3: Which channel has the highest number of subscribers?
max_subscribers_channel = channels.loc[channels['subscriber_count'].idxmax(), 'title']
print(f"The channel with the highest number of subscribers is: {max_subscribers_channel}")
# Question 4: Which channel has the highest number of views?
max_views_channel = channels.loc[channels['view_count'].idxmax(), 'title']
print(f"The channel with the highest number of views is: {max_views_channel}")
# Question 5: How many channels have less than 10,000 subscribers?
num_channels_less_than_10k = (channels['subscriber_count'] < 10000).sum()
print(f"The number of channels with less than 10,000 subscribers is: {num_channels_less_than_10k}")
# Question 6: What is the average video count per channel?
avg_video_count_per_channel = channels['video_count'].mean()
print(f"The average video count per channel is: {avg_video_count_per_channel}")
# Question 7: What is the total number of views for all channels combined?
total_views = channels['view_count'].sum()
print(f"The total number of views for all channels combined is: {total_views}")
# Question 8: Is there a correlation between the number of subscribers and the number of views?
corr = channels['subscriber_count'].corr(channels['view_count'])
print(f"The correlation between subscriber count and view count is: {corr}")
# Question 9: Is there a relationship between the age of a channel (based on "published_at") and the number of subscribers or views?
# Calculate the age of the channel in years
import datetime
now = pd.Timestamp.now(tz='UTC')
channels['published_at'] = pd.to_datetime(channels['published_at']).dt.tz_localize(None).dt.tz_localize('UTC')
ages = pd.DataFrame()
ages['age'] = (now - channels['published_at']).dt.days / 365.25

# Calculate the correlation between age and subscriber count
corr_sub = ages['age'].corr(channels['subscriber_count'])
print(f"The correlation between age and subscriber count is: {corr_sub}")

# Calculate the correlation between age and view count
corr_view = ages['age'].corr(channels['view_count'])
print(f"The correlation between age and view count is: {corr_view}")

The average number of views per channel: 3029300.066666667
The average number of subscribers per channel: 47031.0
The channel with the highest number of subscribers is: Sat-B
The channel with the highest number of views is: Sat-B
The number of channels with less than 10,000 subscribers is: 4
The average video count per channel is: 37.266666666666666
The total number of views for all channels combined is: 90879002
The correlation between subscriber count and view count is: 0.9703261979698973
The correlation between age and subscriber count is: 0.3138557226457533
The correlation between age and view count is: 0.35493297332893164


## Visualization



### Compare the number of views, subscribers, and videos for each channel.

In [32]:
import plotly.graph_objs as go

# Sort the data by the selected column
df_views = channels.sort_values(by='view_count', ascending=False)
df_subs = channels.sort_values(by='subscriber_count', ascending=False)
df_videos = channels.sort_values(by='video_count', ascending=False)

# Create data for the bar plots
views_data = go.Bar(x=df_views['title'], y=df_views['view_count'], name='Views')
subscribers_data = go.Bar(x=df_subs['title'], y=df_subs['subscriber_count'], name='Subscribers')
videos_data = go.Bar(x=df_videos['title'], y=df_videos['video_count'], name='Videos')

# Create layout for the plots
layout = go.Layout(title='Comparison of Views, Subscribers, and Videos per Channel')

# Create figure and add subplots for each bar plot
fig = go.Figure(data=[views_data, subscribers_data, videos_data], layout=layout)
fig.update_layout(barmode='group')
fig.show()

In [35]:
# The relationship between the views, subs, videos
import plotly.express as px

fig = px.scatter(channels, x="view_count", y="subscriber_count",
                 size="video_count", hover_data=['title'])

fig.update_layout(title="Relationship between Views and Subscribers",
                  xaxis_title="View Count",
                  yaxis_title="Subscriber Count")
fig.show()

In [38]:
fig = px.scatter(channels, x='video_count', y='view_count', hover_data=['title'],
                 title='Relationship Between Video Count and View Count')
fig.update_layout(title="Relationship between Views and Video Counts",
                  xaxis_title="Video Count",
                  yaxis_title="View Count")

fig.show()

In [49]:
fig = px.histogram(channels, x="view_count", nbins=20, title="Distribution of Views")
fig.show()

fig = px.histogram(channels, x="video_count", nbins=20, title="Distribution of Videos")
fig.show()

fig = px.histogram(channels, x="subscriber_count", nbins=20, title="Distribution of Subscribers")
fig.show()

In [48]:
ig = px.histogram(channels, x="view_count", nbins=20, title="Distribution of Views")
fig.show()


In [50]:
# Sort the data by the published date
df_sorted = channels.sort_values(by='published_at')

# Create data for the line plots
views_data = go.Scatter(x=df_sorted['published_at'], y=df_sorted['view_count'], name='Views', mode='lines')
subscribers_data = go.Scatter(x=df_sorted['published_at'], y=df_sorted['subscriber_count'], name='Subscribers', mode='lines')
videos_data = go.Scatter(x=df_sorted['published_at'], y=df_sorted['video_count'], name='Videos', mode='lines')

# Create layout for the plot
layout = go.Layout(title='Trend of Views, Subscribers, and Videos Over Time', xaxis_title='Published Date', yaxis_title='Count')

# Create figure and add subplots for each line plot
fig = go.Figure(data=[views_data, subscribers_data, videos_data], layout=layout)
fig.show()

In [51]:
# Create the heatmap data
heatmap_data = go.Heatmap(
    z=channels.corr(),
    x=channels.columns,
    y=channels.columns,
    colorscale='RdBu',
)

# Set the layout for the heatmap
layout = go.Layout(
    title='Correlation Heatmap',
)

# Create the figure
fig = go.Figure(data=[heatmap_data], layout=layout)

# Show the figure
fig.show()





In [62]:
# Top 10 viewed videos
most_viewed_video = videos.loc[videos['view_count'].idxmax()]
most_viewed_video

video_id                                                  q1_lKXtF0rU
title               Sat-B - Beautiful ft Meddy (Official Lyrics Vi...
channel_id                                   UCtYLotDCyNHvwjnEiTIr7iA
description         Sat-B presents the official lyrics visualizer ...
tags                ['Sat-B', 'Meddy', 'Beautiful', 'Empire Avenue...
published_at                                2020-12-14 16:41:20+00:00
duration                                                      PT3M44S
view_count                                                    4235761
like_count                                                      39944
comment_count                                                    2625
categories          ['Christian Music', 'Hip Hop Music', 'Music', ...
duration_seconds                                                224.0
producers                                                          []
directors                                                          []
Name: 826, dtype: ob

In [63]:
videos.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1188 entries, 0 to 1187
Data columns (total 14 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   video_id          1188 non-null   object 
 1   title             1188 non-null   object 
 2   channel_id        1188 non-null   object 
 3   description       1002 non-null   object 
 4   tags              1188 non-null   object 
 5   published_at      1188 non-null   object 
 6   duration          1188 non-null   object 
 7   view_count        1188 non-null   int64  
 8   like_count        1188 non-null   int64  
 9   comment_count     1188 non-null   int64  
 10  categories        1188 non-null   object 
 11  duration_seconds  1188 non-null   float64
 12  producers         1188 non-null   object 
 13  directors         1188 non-null   object 
dtypes: float64(1), int64(3), object(10)
memory usage: 130.1+ KB


In [65]:
videos['published_at'] = pd.to_datetime(videos['published_at'])

In [66]:
videos.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1188 entries, 0 to 1187
Data columns (total 14 columns):
 #   Column            Non-Null Count  Dtype              
---  ------            --------------  -----              
 0   video_id          1188 non-null   object             
 1   title             1188 non-null   object             
 2   channel_id        1188 non-null   object             
 3   description       1002 non-null   object             
 4   tags              1188 non-null   object             
 5   published_at      1188 non-null   datetime64[ns, UTC]
 6   duration          1188 non-null   object             
 7   view_count        1188 non-null   int64              
 8   like_count        1188 non-null   int64              
 9   comment_count     1188 non-null   int64              
 10  categories        1188 non-null   object             
 11  duration_seconds  1188 non-null   float64            
 12  producers         1188 non-null   object             
 13  dir

In [73]:
from datetime import datetime, timedelta

# Filter for videos published in the last 2 weeks
two_weeks_ago = datetime.now() - timedelta(weeks=2)
df = videos[videos['published_at'] >= two_weeks_ago]

# Calculate the score based on views, comments, and likes
df['score'] = videos['view_count'] + videos['comment_count'] + videos['like_count']

# Sort by the score in descending order to get the top trending videos
top_trending_videos = videos.sort_values(by='score', ascending=False)

TypeError: Invalid comparison between dtype=datetime64[ns, UTC] and datetime

In [74]:
videos['published_at'] = pd.to_datetime(videos['published_at'], utc=True)

start_date = pd.to_datetime('2022-03-01', utc=True)
end_date = pd.to_datetime('2022-03-15', utc=True)

mask = (videos['published_at'] >= start_date) & (videos['published_at'] <= end_date)
trending_videos = videos.loc[mask].sort_values(by=['view_count', 'like_count', 'comment_count'], ascending=False)

In [76]:
trending_videos['title']

979         Double Jay - Mafaranga (Lyric Video)
141            Natacha - LAMOTO (Official Video)
1040     B FACE - SORRY   (Official Music Video)
4          Vania Ice - Sinshaka (Official video)
19        EL PRO  - UMUKENYEZI ( Official Video)
557     Esther Nish-MAWE #[Maman] Official audio
Name: title, dtype: object

In [84]:
# Get the date range for the past 2 weeks
end_date = pd.to_datetime(datetime.now(), utc=True)
start_date = pd.to_datetime(end_date - timedelta(days=14), utc=True)

# # Filter the data for the past 2 weeks
df_scores = videos[(videos['published_at'] >= start_date) & (videos['published_at'] <= end_date)]


df_scores['score'] = df_scores['view_count'] + df_scores['comment_count'] + df_scores['like_count']

# # Sort the data by score in descending order and get the top 10
top_10 = df_scores.sort_values(by='score', ascending=False).head(10)





A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [85]:
top_10

Unnamed: 0,video_id,title,channel_id,description,tags,published_at,duration,view_count,like_count,comment_count,categories,duration_seconds,producers,directors,score
0,2GJKDs7Xhms,"Vania Ice - Narahezagiwe ft B face, Dj Paulin ...",UCkyBtxQh0H5yOU5RjtHNr-w,"When Dancehall meets Gospel in Burundi, this t...","['bface', 'vaniaice', 'burundi', 'djpaulin', '...",2023-03-09 18:00:07+00:00,PT2M51S,50952,4394,347,"['Christian Music', 'Electronic Music', 'Hip H...",171.0,[],[],55693
914,7h-qH8aLlzg,Meili- KURE,UCbZrCOhe0bL-voF-XZXjNww,#meili #kure #burundi\nThe Key entertainment p...,[],2023-03-06 17:00:08+00:00,PT3M2S,51927,2789,440,"['Music', 'Music Of Asia', 'Pop Music', 'Rhyth...",182.0,[],[],55156
329,YbsDeD7gcy0,Alvin Smith - For You (Official Music Video),UC4p-gO6XjwnP73PvOSCKwaQ,#alvinsmith #foryou #2023 \n\nVideo director :...,['@youtube @music @2023'],2023-03-16 09:00:09+00:00,PT2M21S,25727,3525,448,"['Hip Hop Music', 'Music', 'Pop Music']",141.0,[],[],29700
654,-WKzUUbpOs4,Mb Data - Nkirigita [Visualizer],UC7TaNWr6GltNDVP8klOwmBw,Now available on All theplatforms: https://rel...,[],2023-03-07 17:10:12+00:00,PT2M35S,21378,2447,356,"['Christian Music', 'Hip Hop Music', 'Music', ...",155.0,[],[],24181
333,ME3pP_c5g1M,Alvin Smith - Urugo Ruhire ft Double Jay (Offi...,UC4p-gO6XjwnP73PvOSCKwaQ,,[],2023-03-13 20:03:34+00:00,PT3M38S,12963,1946,222,"['Christian Music', 'Hip Hop Music', 'Music', ...",218.0,[],[],15131
334,J-qQiFzM8BU,Alvin Smith - Jegeza Ingidi ft B-Face (Officia...,UC4p-gO6XjwnP73PvOSCKwaQ,-#alvinsmith #foryouep #bface,[],2023-03-13 20:03:21+00:00,PT4M6S,9006,1574,201,"['Hip Hop Music', 'Music']",246.0,[],[],10781
332,kXRkEUzgqFo,Alvin Smith - Ndakadame (Official Audio),UC4p-gO6XjwnP73PvOSCKwaQ,#alvinsmith #foryouep,[],2023-03-13 20:03:44+00:00,PT2M57S,6214,936,129,"['Electronic Music', 'Hip Hop Music', 'Music',...",177.0,[],[],7279
331,EE8IU9zbjYA,Alvin Smith - For You (Official Audio),UC4p-gO6XjwnP73PvOSCKwaQ,#alvinsmith #foryouep,[],2023-03-13 20:04:05+00:00,PT2M21S,5398,895,125,"['Music', 'Pop Music']",141.0,[],[],6418
330,AZmeMcYqtd4,Alvin Smith - Distance ft Chris MB & Endo Mike...,UC4p-gO6XjwnP73PvOSCKwaQ,,[],2023-03-14 13:32:11+00:00,PT3M40S,3897,557,67,"['Hip Hop Music', 'Music', 'Pop Music']",220.0,[],[],4521
335,0EaP8hONseg,Alvin Smith - Please ft Banda (Official Audio),UC4p-gO6XjwnP73PvOSCKwaQ,#alvinsmith #foryouep #banda,[],2023-03-13 20:03:09+00:00,PT3M1S,3419,532,83,['Music'],181.0,[],[],4034


In [86]:
now = datetime.utcnow()
start_of_month = pd.to_datetime(datetime(now.year, now.month, 1), utc=True)
end_of_month = pd.to_datetime(start_of_month + timedelta(days=31), utc=True)

# Filter the dataset to include only videos published this month
mask = (videos['published_at'] >= start_of_month) & (videos['published_at'] < end_of_month)
df_month = videos.loc[mask]

# Calculate a weighted score for each video based on views, likes, and comments
df_month['score'] = df_month['view_count'] + df_month['like_count'] + df_month['comment_count']

# Sort the dataset by score in descending order and get the top 10 videos
top_10 = df_month.sort_values('score', ascending=False).head(10)
top_10



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,video_id,title,channel_id,description,tags,published_at,duration,view_count,like_count,comment_count,categories,duration_seconds,producers,directors,score
321,qWSKrMJnTWk,Drama T - ITAMPORIZE (Official Music Video),UCz43BLa4tq0JvitUi6yAUNA,Director: @john.elarts\nProducer: @joellhought...,[],2023-03-03 16:00:08+00:00,PT4M29S,671430,31713,3862,"['Hip Hop Music', 'Independent Music', 'Music'...",269.0,[],[],707005
957,oJ2DGoRXDKQ,D-ONE - Amarira (Official Video),UC5RZCpNS9GY1M9_4vABle4w,Video Production: BAHEZA PICTURES \nDirector :...,[],2023-03-05 04:00:08+00:00,PT4M25S,148865,11675,1617,"['Christian Music', 'Music']",265.0,[],[[':']],162157
0,2GJKDs7Xhms,"Vania Ice - Narahezagiwe ft B face, Dj Paulin ...",UCkyBtxQh0H5yOU5RjtHNr-w,"When Dancehall meets Gospel in Burundi, this t...","['bface', 'vaniaice', 'burundi', 'djpaulin', '...",2023-03-09 18:00:07+00:00,PT2M51S,50952,4394,347,"['Christian Music', 'Electronic Music', 'Hip H...",171.0,[],[],55693
914,7h-qH8aLlzg,Meili- KURE,UCbZrCOhe0bL-voF-XZXjNww,#meili #kure #burundi\nThe Key entertainment p...,[],2023-03-06 17:00:08+00:00,PT3M2S,51927,2789,440,"['Music', 'Music Of Asia', 'Pop Music', 'Rhyth...",182.0,[],[],55156
329,YbsDeD7gcy0,Alvin Smith - For You (Official Music Video),UC4p-gO6XjwnP73PvOSCKwaQ,#alvinsmith #foryou #2023 \n\nVideo director :...,['@youtube @music @2023'],2023-03-16 09:00:09+00:00,PT2M21S,25727,3525,448,"['Hip Hop Music', 'Music', 'Pop Music']",141.0,[],[],29700
654,-WKzUUbpOs4,Mb Data - Nkirigita [Visualizer],UC7TaNWr6GltNDVP8klOwmBw,Now available on All theplatforms: https://rel...,[],2023-03-07 17:10:12+00:00,PT2M35S,21378,2447,356,"['Christian Music', 'Hip Hop Music', 'Music', ...",155.0,[],[],24181
333,ME3pP_c5g1M,Alvin Smith - Urugo Ruhire ft Double Jay (Offi...,UC4p-gO6XjwnP73PvOSCKwaQ,,[],2023-03-13 20:03:34+00:00,PT3M38S,12963,1946,222,"['Christian Music', 'Hip Hop Music', 'Music', ...",218.0,[],[],15131
334,J-qQiFzM8BU,Alvin Smith - Jegeza Ingidi ft B-Face (Officia...,UC4p-gO6XjwnP73PvOSCKwaQ,-#alvinsmith #foryouep #bface,[],2023-03-13 20:03:21+00:00,PT4M6S,9006,1574,201,"['Hip Hop Music', 'Music']",246.0,[],[],10781
332,kXRkEUzgqFo,Alvin Smith - Ndakadame (Official Audio),UC4p-gO6XjwnP73PvOSCKwaQ,#alvinsmith #foryouep,[],2023-03-13 20:03:44+00:00,PT2M57S,6214,936,129,"['Electronic Music', 'Hip Hop Music', 'Music',...",177.0,[],[],7279
331,EE8IU9zbjYA,Alvin Smith - For You (Official Audio),UC4p-gO6XjwnP73PvOSCKwaQ,#alvinsmith #foryouep,[],2023-03-13 20:04:05+00:00,PT2M21S,5398,895,125,"['Music', 'Pop Music']",141.0,[],[],6418


In [87]:
videos.columns

Index(['video_id', 'title', 'channel_id', 'description', 'tags',
       'published_at', 'duration', 'view_count', 'like_count', 'comment_count',
       'categories', 'duration_seconds', 'producers', 'directors'],
      dtype='object')

In [94]:
df_year =videos.copy()
df_year['year'] = videos['published_at'].dt.year

# Step 2
top_videos = df_year.groupby('year').idxmax()[['view_count', 'like_count', 'comment_count']]

# Step 3
top_songs = df_year.loc[top_videos['view_count']].reset_index(drop=True)


The default value of numeric_only in DataFrameGroupBy.idxmax is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.



In [91]:
df_year

Unnamed: 0,year
0,2023
1,2022
2,2022
3,2022
4,2022
...,...
1183,2016
1184,2016
1185,2016
1186,2016


In [95]:
top_songs

Unnamed: 0,video_id,title,channel_id,description,tags,published_at,duration,view_count,like_count,comment_count,categories,duration_seconds,producers,directors,year
0,pret6HYuiCk,Sat-B - Nyandika (Official Video),UCtYLotDCyNHvwjnEiTIr7iA,Nyandika Mugitabo LEVEL UP with Vusi Nova http...,"['Sat-B', 'Nyandika', 'Kigali', 'Rwanda', 'Bur...",2014-09-24 23:33:21+00:00,PT4M2S,143559,2819,134,"['Hip Hop Music', 'Music', 'Pop Music']",242.0,[],[],2014
1,c-LRdU3dP2E,Burundi Bwacu by NATACHA BURUNDI (Official Video),UCkKTwGg9aVGJw-Y21hFG5Qg,Song by Natacha \nVideo Direction & Editing by...,"['Burundi Bwacu', 'Burundi Bwacu natacha', 'na...",2015-05-29 09:07:08+00:00,PT4M1S,146158,834,53,"['Christian Music', 'Music']",241.0,[],[],2015
2,kqy7gRKDKLQ,Sat B - Nyampinga (Official Music Video),UCtYLotDCyNHvwjnEiTIr7iA,The word NYAMPINGA means MISS.... This is an i...,[],2016-01-24 12:53:00+00:00,PT4M54S,473002,5774,171,"['Christian Music', 'Hip Hop Music', 'Music', ...",294.0,[],[],2016
3,WFPWpqWPFuI,Mt number one - yasa n'irirenga,UCK8Dj51vx2iLG42Xid5zqXA,Music video for yasa n'irirenga performed by M...,"['vydia', 'music', 'music video', ""yasa n'irir...",2017-04-25 13:27:42+00:00,PT4M56S,1509269,10502,451,"['Christian Music', 'Hip Hop Music', 'Music', ...",296.0,[],[],2017
4,Cp42frdwBh4,Sat-B - No Love (Official Music Video),UCtYLotDCyNHvwjnEiTIr7iA,"The wait is over now, Burundian artist SAT-B p...",[],2018-07-26 06:52:16+00:00,PT3M31S,1232437,16572,1289,"['Hip Hop Music', 'Music', 'Pop Music']",211.0,[],[],2018
5,TCD8QjZOcEE,BANTUBWOY - SUGUA (OFFICIAL VIDEO) ft. BIG FIZ...,UCtSpWVsMMl9eK5sGOzcUAtA,#Bigfizzo (https://www.youtube.com/results?sea...,"['BANTUBWOY', 'SUGUA', '(OFFICIAL', 'VIDEO)', ...",2019-08-31 13:50:09+00:00,PT3M55S,657975,11232,1314,"['Hip Hop Music', 'Music', 'Pop Music']",235.0,[[': X-Fecta x Peniel Pro Audio&Video by']],[],2019
6,q1_lKXtF0rU,Sat-B - Beautiful ft Meddy (Official Lyrics Vi...,UCtYLotDCyNHvwjnEiTIr7iA,Sat-B presents the official lyrics visualizer ...,"['Sat-B', 'Meddy', 'Beautiful', 'Empire Avenue...",2020-12-14 16:41:20+00:00,PT3M44S,4235761,39944,2625,"['Christian Music', 'Hip Hop Music', 'Music', ...",224.0,[],[],2020
7,SBupxUQTBtM,Trey Zo & Rappy Boy - Yooh Remix ft Double Jay...,UCMrF7OdgLb18wjk8U-wB5sg,Audio Produced by X-Fecta\nVideo Directed by J...,[],2021-11-27 17:16:09+00:00,PT3M37S,1531170,18568,905,"['Hip Hop Music', 'Music', 'Pop Music']",217.0,[],[],2021
8,UgwkUd4zBlE,Drama T - MADAMU (Official Music Video),UCz43BLa4tq0JvitUi6yAUNA,Director: @john.elarts\nProducer: @joellhought...,[],2022-08-05 10:00:12+00:00,PT4M36S,4113178,56238,3280,"['Hip Hop Music', 'Music', 'Pop Music']",276.0,[],[],2022
9,qWSKrMJnTWk,Drama T - ITAMPORIZE (Official Music Video),UCz43BLa4tq0JvitUi6yAUNA,Director: @john.elarts\nProducer: @joellhought...,[],2023-03-03 16:00:08+00:00,PT4M29S,671430,31713,3862,"['Hip Hop Music', 'Independent Music', 'Music'...",269.0,[],[],2023


In [96]:
videos.columns

Index(['video_id', 'title', 'channel_id', 'description', 'tags',
       'published_at', 'duration', 'view_count', 'like_count', 'comment_count',
       'categories', 'duration_seconds', 'producers', 'directors'],
      dtype='object')

In [111]:
df = videos.copy()


df['published_at'] = pd.to_datetime(df['published_at'])

# extract year from published_at
grouped = df.groupby(['channel_id', pd.Grouper(key='published_at', freq='D')])['view_count'].sum().reset_index()

# merge channel titles
grouped = grouped.merge(df[['channel_id', 'title']].drop_duplicates(), on='channel_id')

# create a line chart
fig = go.Figure()
for title in grouped['title'].unique():
    channel_data = grouped[grouped['title'] == title]
    fig.add_trace(go.Scatter(x=channel_data['published_at'], y=channel_data['view_count'], name=title))

# set the axis labels
fig.update_layout(
    xaxis_title='Published Date',
    yaxis_title='View Count'
)

# show the chart
fig.show()

[{'label': 'UC4p-gO6XjwnP73PvOSCKwaQ', 'value': 'UC4p-gO6XjwnP73PvOSCKwaQ'},
 {'label': 'UC5AplB8THLND3Dm3V_8c6RA', 'value': 'UC5AplB8THLND3Dm3V_8c6RA'},
 {'label': 'UC5RZCpNS9GY1M9_4vABle4w', 'value': 'UC5RZCpNS9GY1M9_4vABle4w'},
 {'label': 'UC7TaNWr6GltNDVP8klOwmBw', 'value': 'UC7TaNWr6GltNDVP8klOwmBw'},
 {'label': 'UC8CCClnyP52IVAtYXVHBvvw', 'value': 'UC8CCClnyP52IVAtYXVHBvvw'},
 {'label': 'UC9E7m6sVh7qP3uy_i4rFXlQ', 'value': 'UC9E7m6sVh7qP3uy_i4rFXlQ'},
 {'label': 'UCAHRby1-eD2Zxzp0ltrxjsw', 'value': 'UCAHRby1-eD2Zxzp0ltrxjsw'},
 {'label': 'UCD6q_wl-ZQACvCMvn7o46Cw', 'value': 'UCD6q_wl-ZQACvCMvn7o46Cw'},
 {'label': 'UCGsE-DkfHgcjm-erxuneEhQ', 'value': 'UCGsE-DkfHgcjm-erxuneEhQ'},
 {'label': 'UCK8Dj51vx2iLG42Xid5zqXA', 'value': 'UCK8Dj51vx2iLG42Xid5zqXA'},
 {'label': 'UCLP7iLOB-VJ0-G_xhmYlupw', 'value': 'UCLP7iLOB-VJ0-G_xhmYlupw'},
 {'label': 'UCMrF7OdgLb18wjk8U-wB5sg', 'value': 'UCMrF7OdgLb18wjk8U-wB5sg'},
 {'label': 'UCNzcpb75Oeq9hs5sMX8ThTQ', 'value': 'UCNzcpb75Oeq9hs5sMX8ThTQ'},

In [112]:
channels.columns

Index(['channel_id', 'title', 'description', 'published_at', 'uploads_id',
       'view_count', 'subscriber_count', 'video_count'],
      dtype='object')

In [119]:
videos.head()

Unnamed: 0,video_id,title,channel_id,description,tags,published_at,duration,view_count,like_count,comment_count,categories,duration_seconds,producers,directors
0,2GJKDs7Xhms,"Vania Ice - Narahezagiwe ft B face, Dj Paulin ...",UCkyBtxQh0H5yOU5RjtHNr-w,"When Dancehall meets Gospel in Burundi, this t...","['bface', 'vaniaice', 'burundi', 'djpaulin', '...",2023-03-09 18:00:07+00:00,PT2M51S,50952,4394,347,"['Christian Music', 'Electronic Music', 'Hip H...",171.0,[],[]
1,RCJoF2qAKJc,Vania Ice - I Dey feat. Kivumbi,UCkyBtxQh0H5yOU5RjtHNr-w,Artist: Vania Ice ft. Kivumbi \nDirected by Me...,[],2022-11-25 16:27:23+00:00,PT3M10S,66071,3700,246,"['Hip Hop Music', 'Music', 'Pop Music']",190.0,[],[]
2,QOFonQE-Eo0,Vania Ice - FOREVER (Official Lyric Video),UCkyBtxQh0H5yOU5RjtHNr-w,#Vania #Ice #Forever\nStream Forever: https://...,"['vanilla ice', 'vania', 'ice', 'forever', 'we...",2022-06-24 16:48:33+00:00,PT3M53S,46624,2339,197,"['Christian Music', 'Music', 'Pop Music']",233.0,[],[]
3,ksmmUkDQgvA,Vania Ice - Contigo (Official video),UCkyBtxQh0H5yOU5RjtHNr-w,#Vania #Ice #Contigo\nStream Contigo: https://...,"['#summertime', '#latina', 'vania ice', 'kina ...",2022-05-19 15:57:26+00:00,PT3M28S,71902,2698,274,"['Music', 'Music Of Latin America', 'Pop Music']",208.0,[['I']],[]
4,gGRSsPVL8f4,Vania Ice - Sinshaka (Official video),UCkyBtxQh0H5yOU5RjtHNr-w,Music Video by Vania Ice performing Sinshaka.\...,"['burundi', 'vania', 'love songs', 'cheating',...",2022-03-04 08:31:35+00:00,PT3M54S,64942,2569,247,"['Music', 'Pop Music']",234.0,[],[]
