## YouTube Channel Analyzer

In [50]:
# importing required library
import pandas as pd
import os
from datetime import datetime
from dotenv import load_dotenv
load_dotenv()

True

In [51]:
# reading given file
input_file_name = "youtube_channels_sample - Youtube.csv"
input_file_path = os.path.join(os.getcwd(),input_file_name)

# creating dataframe
df = pd.read_csv(input_file_path)

In [52]:
# visualizing given data
df.head()
# outcome: The given data is YouTube channel names

Unnamed: 0,Youtube Profiles
0,Spreading Gyan
1,We Make Creators
2,Manoj Dey
3,Skillsiya
4,Learn With Lokesh Lalwani


In [53]:
# checking for valid records
print(df.isna().sum())
print(df.isnull().sum())
print(df.shape)
# outcome: Given 20 channel names

Youtube Profiles    0
dtype: int64
Youtube Profiles    0
dtype: int64
(20, 1)


In [54]:
# converting dataframe to list
channel_names = df["Youtube Profiles"].to_list()
print(channel_names)

['Spreading Gyan', 'We Make Creators', 'Manoj Dey', 'Skillsiya', 'Learn With Lokesh Lalwani', 'Upcoming World\r', 'SJ SEARCHING', 'SOCIALEPFO', 'Sarkari DNA', 'WorkerVoice.in', 'Anurag Aggarwal: Business Coach', 'CA Rahul Malodia: Business Coach', 'Dr. Ujjwal Patni Business Coach', 'Biz Tak', 'CA Rachana Phadke Ranade', 'NDTV Profit', 'bekifaayati', 'Money9', 'Invest Aaj For Kal', 'Dear Sir']


In [None]:
# configurations for API (using YouTube Data API to get data)
# reading API KEY (MY_YOUTUBE_API_KEY) from .env file
import googleapiclient.discovery
api_service_name = "youtube"
api_version = "v3"
DEVELOPER_KEY = os.getenv("MY_YOUTUBE_API_KEY")
youtube = googleapiclient.discovery.build(api_service_name, api_version, developerKey = DEVELOPER_KEY)

In [56]:
# function to get channel IDs by name
def get_channel_id(name):
    request = youtube.search().list(
        part="snippet",
        maxResults=1,
        q = name,
        type="channel"
    )
    response = request.execute()
    return response["items"][0]["snippet"]["channelId"]

In [57]:
# making list of channel IDs 
channel_ids = [get_channel_id(name) for name in channel_names]

In [58]:
print(channel_ids)

['UCZDCqcWZVNyvTSsoc-chKJg', 'UCr6hHjMNvgQxErQ1ZJV4drw', 'UCT2xzagsBVtStUCA2ulvKmQ', 'UCsQ6_w5iUOUOMAUPclg2kcg', 'UCFAr3FQxRhSzVNOD3vq1gMQ', 'UC2gdXA4n6_yEHos7CyQk7rA', 'UC2zYoCWdj3qLtAspeirqWUQ', 'UClmVVcYuH0ZPm3qsPm1TQIg', 'UCtsetVYMkX8iPFS2oUOcXEw', 'UCfj7SPM25A3RQSYE1YFPyVQ', 'UCBM6AwaDl8uPVAN_BUIqPUw', 'UCPgfM-dk3XAb4T3DtT6Nwsw', 'UCqXCX2DnQZh8e4VNT7MPTtA', 'UCSWSOS6YXUbNMzTH-tV7Pfw', 'UCe3qdG0A_gr-sEdat5y2twQ', 'UC3uJIdRFTGgLWrUziaHbzrg', 'UCdvOCtR3a9ICLAw0DD3DpXg', 'UCdYV_l-cgsMRHZvp0zkmujg', 'UCWHCXSKASuSzao_pplQ7SPw', 'UC9dyrsYEmD4iGJ8Oz1G5dpw']


In [59]:
# function to get channel stats
def get_channel_stats(id):
    request = youtube.channels().list(
        part="snippet,statistics,contentDetails",
        id = id
    )
    response = request.execute()
    row = {
        "title": response["items"][0]["snippet"]["title"],
        "channelId": id,
        "publishedAt": response["items"][0]["snippet"]["publishedAt"],
        "viewCount": int(response["items"][0]["statistics"]["viewCount"]),
        "subscriberCount": int(response["items"][0]["statistics"]["subscriberCount"]),
        "videoCount": int(response["items"][0]["statistics"]["videoCount"]),
        "uploadsPlaylistId": response["items"][0]["contentDetails"]["relatedPlaylists"]["uploads"]
    }
    return row

In [60]:
# getting stats data and setting up
channel_data = { id: get_channel_stats(id) for id in channel_ids }
channel_data = { index: item for index,item in enumerate(channel_data.values()) }

In [61]:
print(channel_data)

{0: {'title': 'Spreading Gyan', 'channelId': 'UCZDCqcWZVNyvTSsoc-chKJg', 'publishedAt': '2019-11-13T18:35:26.321751Z', 'viewCount': 335350998, 'subscriberCount': 4480000, 'videoCount': 335, 'uploadsPlaylistId': 'UUZDCqcWZVNyvTSsoc-chKJg'}, 1: {'title': 'We Make Creators', 'channelId': 'UCr6hHjMNvgQxErQ1ZJV4drw', 'publishedAt': '2018-02-11T20:07:40Z', 'viewCount': 128358844, 'subscriberCount': 1130000, 'videoCount': 1413, 'uploadsPlaylistId': 'UUr6hHjMNvgQxErQ1ZJV4drw'}, 2: {'title': 'Manoj Dey', 'channelId': 'UCT2xzagsBVtStUCA2ulvKmQ', 'publishedAt': '2016-11-24T14:48:33Z', 'viewCount': 1001951068, 'subscriberCount': 7010000, 'videoCount': 1728, 'uploadsPlaylistId': 'UUT2xzagsBVtStUCA2ulvKmQ'}, 3: {'title': 'Skillsiya', 'channelId': 'UCsQ6_w5iUOUOMAUPclg2kcg', 'publishedAt': '2019-11-25T16:38:02.828954Z', 'viewCount': 69710221, 'subscriberCount': 970000, 'videoCount': 306, 'uploadsPlaylistId': 'UUsQ6_w5iUOUOMAUPclg2kcg'}, 4: {'title': 'Learn With Lokesh Lalwani', 'channelId': 'UCFAr3FQ

In [62]:
# function to get all videos IDs of each channel (limit 10, low due to free tier API)
# As we now to get better quality analysis we need to extract large number of data but due to free API limitations I set limit to 5 video/channel
def get_channel_videos_id(uploadsPlaylistId, limit=10):
    request = youtube.playlistItems().list(
        part="contentDetails",
        maxResults = limit,
        playlistId = uploadsPlaylistId
    )
    response = request.execute()
    return [ response["items"][i]["contentDetails"]["videoId"] for i in range(limit) ]

In [63]:
# getting list of all videos IDs
for v in channel_data.values():
    v["allVideoIds"] = get_channel_videos_id(v["uploadsPlaylistId"])

In [64]:
print(channel_data)

{0: {'title': 'Spreading Gyan', 'channelId': 'UCZDCqcWZVNyvTSsoc-chKJg', 'publishedAt': '2019-11-13T18:35:26.321751Z', 'viewCount': 335350998, 'subscriberCount': 4480000, 'videoCount': 335, 'uploadsPlaylistId': 'UUZDCqcWZVNyvTSsoc-chKJg', 'allVideoIds': ['TUBCGv9rTD8', 'HtfUJXIEr28', 'UQVH8WRQKFM', 'FXrb7KCUNJ4', 'OiFy2c20l3w', 'diNL6upmuv0', 'NxVEG-zRwbI', 't6Y2JhttXgU', 'LeGgqJZj2Bo', 'tmN3wYRBlS8']}, 1: {'title': 'We Make Creators', 'channelId': 'UCr6hHjMNvgQxErQ1ZJV4drw', 'publishedAt': '2018-02-11T20:07:40Z', 'viewCount': 128358844, 'subscriberCount': 1130000, 'videoCount': 1413, 'uploadsPlaylistId': 'UUr6hHjMNvgQxErQ1ZJV4drw', 'allVideoIds': ['XB_jrIAG8qQ', 'gGhbPlIf37c', 'Drzg8ZPGb2A', '_MkTNur3fxM', 'S3qGz3KcMm8', 'T8_7xaJqFgQ', 'fQjZwItB_N4', 'ci4viuQSb1c', '330ynLHYi18', 'HVPkIs8MLXg']}, 2: {'title': 'Manoj Dey', 'channelId': 'UCT2xzagsBVtStUCA2ulvKmQ', 'publishedAt': '2016-11-24T14:48:33Z', 'viewCount': 1001951068, 'subscriberCount': 7010000, 'videoCount': 1728, 'uploadsPlay

In [65]:
# function to get each video stats
def get_video_stats(videoIds):
    request = youtube.videos().list(
        part="snippet,statistics",
        id = ",".join(videoIds)
    )
    response = request.execute()
    # row = {}
    # for i in range(len(videoIds)):
    #     row[videoIds[i]] = [
    #         response["items"][i]["snippet"]["title"],
    #         response["items"][i]["statistics"]["viewCount"],
    #         response["items"][i]["statistics"]["likeCount"],
    #         response["items"][i]["statistics"]["commentCount"]
    #     ]
    # return row

    row = {}
    for i in range(len(videoIds)):
        stats = response["items"][i]["statistics"]
        row[videoIds[i]] = [
            response["items"][i]["snippet"]["title"],
            stats["viewCount"],
            stats.get("likeCount", 0),  
            stats.get("commentCount", 0) 
        ]
    return row

In [66]:
# getting each video stats like title, views, comments, like...
for v in channel_data.values():
    v["videosInfo"] = get_video_stats(v["allVideoIds"])

In [67]:
print(channel_data)



In [68]:
# performing analysis and preparing output data
output_dict = {}
for index, (channel, details) in enumerate(channel_data.items(), start=1):
    
    total_likes = 0
    total_comments = 0
    total_videos = len(details["videosInfo"])
    video_titles = []

    for video_id, stats in details["videosInfo"].items():
        title, views, likes, comments = stats
        total_likes += int(likes) 
        total_comments+= int(comments)
        video_titles.append(title)

    avg_likes = round(total_likes / total_videos if total_videos > 0 else 0)
    avg_comments = round(total_comments / total_videos if total_videos > 0 else 0)
    eng_rate = round(((avg_likes+avg_comments)*100) / details["viewCount"], 4)

    publish_date = details["publishedAt"]
    date_obj = datetime.strptime(publish_date[:10], '%Y-%m-%d')
    current_date = datetime.now()
    total_months = (current_date.year - date_obj.year) * 12 + (current_date.month - date_obj.month)
    upload_frequency = round(details["videoCount"] / total_months)

    output_dict[index] = {
        "Channel Name": details["title"],
        "Subscribers": details["subscriberCount"],
        "Total Views": details["viewCount"],
        "Average Likes": avg_likes,
        "Average Comments": avg_comments,
        "Engagement Rate": eng_rate,
        "Upload Frequency": upload_frequency,
        "Video Titles": video_titles,
    }
    
    # to show each video title in seperate column 
    # for i, title in enumerate(video_titles, start=1):
    #     output_dict[index][f"Video({i}) Title"] = title

In [69]:
# The average of likes, comments calculated within the query limit (So actual results may be vary)
# The formula used for calculating engagement rate = [(Average_Likes + Average_Comments)*100] / Total_Views_Of_Channel
# The Uploads frequency is calculated as Video Uploads Per Month

In [70]:
print(output_dict)



In [71]:
# sorting based on "Engagement Rate"
sorted_data = dict(sorted(output_dict.items(), key=lambda x: x[1]["Engagement Rate"], reverse=True))
print(sorted_data)




In [72]:
# Output as dataframe
output = pd.DataFrame.from_dict(sorted_data, orient="index")

In [75]:
output = output.reset_index(drop=True)
output.head()

Unnamed: 0,Channel Name,Subscribers,Total Views,Average Likes,Average Comments,Engagement Rate,Upload Frequency,Video Titles
0,Learn With Lokesh Lalwani,1320000,59583787,5317,631,0.01,6,"[Help business to create content using AI 🚀, E..."
1,Skillsiya,970000,69710221,2557,76,0.0038,5,[This FREE AI Tool Changed How You Learn Forev...
2,Anurag Aggarwal: Business Coach,5240000,1973396673,65593,305,0.0033,7,[Kill your loan fast | Anurag Aggarwal | In Hi...
3,Spreading Gyan,4480000,335350998,6465,1606,0.0024,5,[Youtube Channel Kaise Banaye | youtube channe...
4,CA Rahul Malodia: Business Coach,6590000,1044360611,25195,224,0.0024,16,[एक गलती कैसे Business को पड़ती है भारी ! #shor...


In [76]:
# Saving output to csv file
output.to_csv("youtube_channel_analysis_output.csv", index=False)

In [None]:
# Note: I only extracted limited videos per channel due to API limitations.
# Note: Output of some "Engagement Rate" maybe zero because we checked only limited video & most recently uploaded.

#### Github Link (Current Project): https://github.com/rohitkumar-dev/youtube-channel-analyzer
#### Google AppScript Task Sheet Link (Email Sender): https://docs.google.com/spreadsheets/d/1V4jU-pmiDllYdOf6mpvnUWDV87aj86Yzf0MzhNvgcgE/edit?usp=sharing