In [1]:
import json
import math
from tqdm import tqdm

from googleapiclient.discovery import build
import pandas as pd
import seaborn as sns

In [2]:
n_max_req = 50     #The maximum number of channel stat results

In [3]:
with open("../api_key.json", "r") as keyFile:
    keys = json.load(keyFile)

api_key = keys["key"]
origin_channel_id = keys["channelID"]

In [4]:
youtube = build("youtube", "v3", developerKey=api_key)

### Get Subscription of given Channel

In [5]:
def get_subscriptions(youtube, chID):
    request = youtube.subscriptions().list(
                part="snippet,contentDetails",
                channelId=chID
                )
    response = request.execute()
    n_total  = response["pageInfo"]["totalResults"]
    n_pPage  = response["pageInfo"]["resultsPerPage"]
    n_pages  = math.ceil(n_total/n_pPage)
    
    subs     = []
    pToken   = response["nextPageToken"]
    subs.extend(response["items"])
    for ii in tqdm(range(n_pages-2)):
        request = youtube.subscriptions().list(
                    part="snippet,contentDetails",
                    channelId=chID,
                    pageToken=pToken
                    )
        response = request.execute()
        pToken   = response["nextPageToken"]
        subs.extend(response["items"])
        
    request = youtube.subscriptions().list(
                part="snippet,contentDetails",
                channelId=chID,
                pageToken=pToken
                )
    response = request.execute()
    subs.extend(response["items"])
        
    return subs

In [6]:
Subscriptions = get_subscriptions(youtube, origin_channel_id)

100%|██████████████████████████████████████████████████████████████████████████████████| 25/25 [00:02<00:00,  8.83it/s]


In [7]:
channel_ids = []

for chnl in Subscriptions:
    channel_ids.append(chnl["snippet"]["resourceId"]["channelId"])

In [8]:
def get_channel_stats(youtube, chIDs):
    if len(chIDs) > n_max_req:
        response = []
        n_req = math.ceil(len(chIDs)/n_max_req)
        
        for ii in range(n_req):
            chID_string = ""
            for chID in chIDs[(ii*n_max_req):(ii*n_max_req)+n_max_req]:
                chID_string += chID + ","
            chID_string = chID_string[:-1]
            request = youtube.channels().list(
                part="snippet,contentDetails,statistics",
                id=chID_string
            )
            response.extend(request.execute()["items"])
        return response
    else:
        chID_string = ""
        for chID in chIDs:
            chID_string += chID + ","
        chID_string = chID_string[:-1]

        request = youtube.channels().list(
            part="snippet,contentDetails,statistics",
            id=chID_string
        )
        return request.execute()["items"]
    
def get_formatted_stats(youtube, channel_ids):
    stats = get_channel_stats(youtube, channel_ids)
    dict_lst = []
    
    for chnl in stats:
        print(chnl)
        data = dict(channel_name = chnl["snippet"]["title"],
                   subs = int(chnl["statistics"]["subscriberCount"]),
                   views = int(chnl["statistics"]["viewCount"]),
                   total_videos = int(chnl["statistics"]["videoCount"]))
        dict_lst.append(data)
        
    return pd.DataFrame(dict_lst)

In [9]:
channel_stats = get_formatted_stats(youtube, channel_ids)

{'kind': 'youtube#channel', 'etag': 'unve4NnXLd6oFhByLBz_WusVzqY', 'id': 'UC8Ujq8PBm0MWraaXd8MsIAQ', 'snippet': {'title': 'Composerily', 'description': 'BASED\n\n\n\n\ngeneral inquiries: composerily@gmail.com', 'customUrl': 'composerily', 'publishedAt': '2017-02-22T01:16:12Z', 'thumbnails': {'default': {'url': 'https://yt3.ggpht.com/6iBvwjWoJbSxgM2M-duIDneyxrGKRBbcmAa2cmGEAEqb-8iLQN7ulRV3vKGRhQrKmhrVebrHgAY=s88-c-k-c0x00ffffff-no-rj', 'width': 88, 'height': 88}, 'medium': {'url': 'https://yt3.ggpht.com/6iBvwjWoJbSxgM2M-duIDneyxrGKRBbcmAa2cmGEAEqb-8iLQN7ulRV3vKGRhQrKmhrVebrHgAY=s240-c-k-c0x00ffffff-no-rj', 'width': 240, 'height': 240}, 'high': {'url': 'https://yt3.ggpht.com/6iBvwjWoJbSxgM2M-duIDneyxrGKRBbcmAa2cmGEAEqb-8iLQN7ulRV3vKGRhQrKmhrVebrHgAY=s800-c-k-c0x00ffffff-no-rj', 'width': 800, 'height': 800}}, 'localized': {'title': 'Composerily', 'description': 'BASED\n\n\n\n\ngeneral inquiries: composerily@gmail.com'}, 'country': 'US'}, 'contentDetails': {'relatedPlaylists': {'likes': ''

In [None]:
channel_stats

In [None]:
def label_point(x, y, val, ax):
    a = pd.concat({'x': x, 'y': y, 'val': val}, axis=1)
    for i, point in a.iterrows():
        ax.text(point['x']+.02, point['y'], str(point['val']))

In [None]:
sns.set(rc={'figure.figsize':(12,8)})
ax = sns.scatterplot(data=channel_stats, x="views", y="subs", size="total_videos", legend=False, sizes=(20,2000))
label_point(channel_stats.views, channel_stats.subs, channel_stats.channel_name, ax)

In [None]:
filtered_channels = channel_stats
filtered_channels = filtered_channels.drop(filtered_channels[filtered_channels.views>1e6].index)

In [None]:
sns.set(rc={'figure.figsize':(12,8)})
ax = sns.scatterplot(data=filtered_channels, x="views", y="subs", size="total_videos", legend=False, sizes=(20,2000))
label_point(filtered_channels.views, filtered_channels.subs, filtered_channels.channel_name, ax)