In [None]:
!pip install -q google-api-python-client

In [None]:
import os
from googleapiclient.discovery import build

# Set up the YouTube API client
# Removed my API key, will have to create and use your own
api_key = '**insert your API key here**'
youtube = build('youtube', 'v3', developerKey=api_key)

# Define the channel ID
# This is for single use ID, if you want multiple IDs
# then you will have to modify a couple lines
channel_id = 'UCnaP100kTBB_WGM9IiF73yw'

# This is for if you use a .env file to store your api key and/or channel ID(s)
# from dotenv import load_dotenv
# load_dotenv()

# api_key = os.getenv('api_key')
# channel_id = os.getenv('channel_id')

In [None]:
# Fetch channel statistics
def get_channel_stats(youtube, channel_id):
    request = youtube.channels().list(
        part='snippet,contentDetails,statistics',
        id=channel_id
    )
    response = request.execute()
    return response

channel_stats = get_channel_stats(youtube, channel_id)
channel_stats

In [None]:
# Fetch video IDs from the channel
def get_video_ids(youtube, channel_id):
    video_ids = []
    request = youtube.playlistItems().list(
        playlistId=channel_stats['items'][0]['contentDetails']['relatedPlaylists']['uploads'],
        part='contentDetails',
        maxResults=50
    )
    while request is not None:
        response = request.execute()
        video_ids += [item['contentDetails']['videoId'] for item in response['items']]
        request = youtube.playlistItems().list_next(request, response)
    return video_ids

video_ids = get_video_ids(youtube, channel_id)
len(video_ids)

In [None]:
# Fetch statistics for each video
def get_video_details(youtube, video_ids):
    video_details = []
    for video_id in video_ids:
        request = youtube.videos().list(
            part='snippet,statistics',
            id=video_id
        )
        response = request.execute()
        video_details.extend(response['items'])
    return video_details

video_details = get_video_details(youtube, video_ids)
len(video_details)

In [None]:
import pandas as pd

# Convert video details to DataFrame
def videos_to_df(video_details):
    videos = []
    for video in video_details:
        video_info = {
            'title': video['snippet']['title'],
            'publishedAt': video['snippet']['publishedAt'],
            'views': int(video['statistics'].get('viewCount', 0)),
            'likes': int(video['statistics'].get('likeCount', 0)),
            'dislikes': int(video['statistics'].get('dislikeCount', 0)),
            'comments': int(video['statistics'].get('commentCount', 0))
        }
        videos.append(video_info)
    return pd.DataFrame(videos)

# Create DataFrame
df_videos = videos_to_df(video_details)
df_videos

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Set dark theme for plots
sns.set(style='darkgrid')
plt.style.use('dark_background')

# Plotting video views distribution
plt.figure(figsize=(10, 6))
sns.histplot(df_videos['views'], kde=True, color='blue')
plt.title('Distribution of Video Views')
plt.xlabel('Views')
plt.ylabel('Frequency')
plt.show()

In [None]:
# Convert 'publishedAt' to datetime
df_videos['publishedAt'] = pd.to_datetime(df_videos['publishedAt'])

# Plotting likes and comments over time
plt.figure(figsize=(12, 6))
sns.lineplot(data=df_videos, x='publishedAt', y='likes', label='Likes', color='blue')
sns.lineplot(data=df_videos, x='publishedAt', y='comments', label='Comments', color='green')
plt.title('Likes and Comments Over Time')
plt.xlabel('Time')
plt.ylabel('Count')
plt.legend()
plt.show()

In [None]:
# Analyzing number of videos uploaded over time
df_videos['year'] = df_videos['publishedAt'].dt.year
video_count_by_year = df_videos.groupby('year').size()

# Plotting number of videos uploaded each year
plt.figure(figsize=(10, 6))
sns.barplot(x=video_count_by_year.index, y=video_count_by_year.values, palette='Blues_r')
plt.title('Number of Videos Uploaded Each Year')
plt.xlabel('Year')
plt.ylabel('Number of Videos')
plt.xticks(rotation=45)
plt.show()

In [None]:
# Top 10 videos for likes, comments, and view count
top_10_likes = df_videos.nlargest(10, 'likes')[['title', 'likes']]
top_10_comments = df_videos.nlargest(10, 'comments')[['title', 'comments']]
top_10_views = df_videos.nlargest(10, 'views')[['title', 'views']]

# Displaying the top 10 videos
top_10_likes, top_10_comments, top_10_views

In [None]:
# Visualizing Top 10 Videos by Likes
plt.figure(figsize=(10, 6))
sns.barplot(x='likes', y='title', data=top_10_likes, palette='Blues_r')
plt.title('Top 10 Videos by Likes')
plt.xlabel('Likes')
plt.ylabel('Video Title')
plt.show()

In [None]:
# Visualizing Top 10 Videos by Comments
plt.figure(figsize=(10, 6))
sns.barplot(x='comments', y='title', data=top_10_comments, palette='Blues_r')
plt.title('Top 10 Videos by Comments')
plt.xlabel('Comments')
plt.ylabel('Video Title')
plt.show()

In [None]:
# Visualizing Top 10 Videos by Views
plt.figure(figsize=(10, 6))
sns.barplot(x='views', y='title', data=top_10_views, palette='Blues_r')
plt.title('Top 10 Videos by Views')
plt.xlabel('Views')
plt.ylabel('Video Title')
plt.show()

In [None]:
# Analyzing the month with the most video uploads
df_videos['month'] = df_videos['publishedAt'].dt.month
video_count_by_month = df_videos.groupby('month').size()

# Plotting number of videos uploaded each month
plt.figure(figsize=(10, 6))
sns.barplot(x=video_count_by_month.index, y=video_count_by_month.values, palette='Blues_r')
plt.title('Number of Videos Uploaded Each Month')
plt.xlabel('Month')
plt.ylabel('Number of Videos')
plt.xticks(range(0, 12), ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])
plt.show()

In [None]:
# Creating a year-month column for comparison
df_videos['year_month'] = df_videos['publishedAt'].dt.strftime('%Y-%m')

# Grouping data by year and month
video_count_by_year_month = df_videos.groupby('year_month').size().reset_index(name='count')

# Plotting video uploads per month per year
plt.figure(figsize=(15, 8))
sns.lineplot(data=video_count_by_year_month, x='year_month', y='count')
plt.title('Video Uploads Per Month Over the Years')
plt.xlabel('Year-Month')
plt.ylabel('Number of Videos')
plt.xticks(rotation=45)
plt.show()

In [None]:
# Plotting video uploads per month per year using a scatterplot without vertical lines
plt.figure(figsize=(15, 8))
sns.scatterplot(data=video_count_by_year_month, x='year_month', y='count')
plt.title('Video Uploads Per Month Over the Years (Scatterplot)')
plt.xlabel('Year-Month')
plt.ylabel('Number of Videos')
plt.xticks(rotation=45)
plt.grid(False)
plt.show()

In [None]:
# Adjusting the scatterplot with larger dots
df_videos['year'] = df_videos['publishedAt'].dt.year
df_videos['month'] = df_videos['publishedAt'].dt.month

# Grouping data by year and month
video_count_by_year_month = df_videos.groupby(['year', 'month']).size().reset_index(name='count')

# Plotting with year bins, color by month, and larger dots
plt.figure(figsize=(15, 8))
sns.scatterplot(data=video_count_by_year_month, x='year', y='count', hue='month', palette='bright', s=100) # Increased dot size
plt.title('Video Uploads Per Year with Monthly Color Coding')
plt.xlabel('Year')
plt.ylabel('Number of Videos')
plt.xticks(video_count_by_year_month['year'].unique())
plt.legend(title='Month', labels=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])
plt.show()

In [None]:
!pip install wordcloud

In [None]:
from wordcloud import WordCloud

# Generating a word cloud from video titles
all_titles = ' '.join(df_videos['title'])
wordcloud = WordCloud(width = 800, height = 800,
                background_color ='black',
                min_font_size = 10).generate(all_titles)

# Displaying the word cloud
plt.figure(figsize = (8, 8), facecolor = None)
plt.imshow(wordcloud)
plt.axis('off')
plt.tight_layout(pad = 0)

plt.show()

In [None]:
# Calculating Engagement Ratios
df_videos['like_ratio'] = df_videos['likes'] / df_videos['views']
df_videos['comment_ratio'] = df_videos['comments'] / df_videos['views']
df_videos['dislike_ratio'] = df_videos['dislikes'] / df_videos['views']

# Plotting Engagement Ratios
plt.figure(figsize=(15, 5))
plt.subplot(1, 3, 1)
sns.histplot(df_videos['like_ratio'], kde=True, color='blue')
plt.title('Like Ratio Distribution')
plt.xlabel('Like Ratio')
plt.ylabel('Frequency')

plt.subplot(1, 3, 2)
sns.histplot(df_videos['comment_ratio'], kde=True, color='green')
plt.title('Comment Ratio Distribution')
plt.xlabel('Comment Ratio')

plt.tight_layout()
plt.show()

In [None]:
# Trend Analysis for Likes, Comments, and Views with tilted X-axis labels
df_videos['year'] = df_videos['publishedAt'].dt.year

# Calculating yearly averages
yearly_avg = df_videos.groupby('year').agg({'views':'mean', 'likes':'mean', 'comments':'mean'}).reset_index()

# Plotting Trend Analysis with tilted X-axis labels
plt.figure(figsize=(15, 5))
plt.subplot(1, 3, 1)
sns.lineplot(data=yearly_avg, x='year', y='views', color='blue')
plt.title('Yearly Average Views')
plt.xlabel('Year')
plt.ylabel('Average Views')
plt.xticks(yearly_avg['year'].unique(), rotation=45)
plt.grid(axis='y', linestyle='-')
plt.grid(axis='x', linestyle=':')

plt.subplot(1, 3, 2)
sns.lineplot(data=yearly_avg, x='year', y='likes', color='green')
plt.title('Yearly Average Likes')
plt.xlabel('Year')
plt.xticks(yearly_avg['year'].unique(), rotation=45)
plt.grid(axis='y', linestyle='-')
plt.grid(axis='x', linestyle=':')

plt.subplot(1, 3, 3)
sns.lineplot(data=yearly_avg, x='year', y='comments', color='red')
plt.title('Yearly Average Comments')
plt.xlabel('Year')
plt.xticks(yearly_avg['year'].unique(), rotation=45)
plt.grid(axis='y', linestyle='-')
plt.grid(axis='x', linestyle=':')

plt.tight_layout()
plt.show()

In [None]:
# Fetching video duration for each video
def get_video_durations(youtube, video_ids):
    video_durations = []
    for video_id in video_ids:
        request = youtube.videos().list(
            part='contentDetails',
            id=video_id
        )
        response = request.execute()
        duration = response['items'][0]['contentDetails']['duration']
        video_durations.append(duration)
    return video_durations

video_durations = get_video_durations(youtube, video_ids)
df_videos['duration'] = video_durations
df_videos.head()

In [None]:
!pip install isodate

In [None]:
import isodate

# Converting duration to total seconds
df_videos['duration_sec'] = df_videos['duration'].apply(lambda x: isodate.parse_duration(x).total_seconds())

# Analyzing correlation of video duration with views, likes, and comments
correlation_data = df_videos[['duration_sec', 'views', 'likes', 'comments']].corr()

# Plotting the correlation heatmap
plt.figure(figsize=(8, 6))
sns.heatmap(correlation_data, annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()