# Channel Statistics

In [1]:
import pandas as pd
import altair as alt
from googleapiclient.discovery import build
from pprint import pprint
import matplotlib.pyplot as plt

df = pd.read_excel('data\\channel_ids.xlsx')
ids = df["Channel ID"]

youtube = build('youtube', 'v3', developerKey = "AIzaSyCVTmWL_vvc7EpDsrAR4QPbjACTcezGqPo")

subcount = []
videocount = []
viewcount = []

for channelid in ids:
    request = youtube.channels().list(part='snippet,contentDetails,statistics', id=channelid)
    response = request.execute()
#     print(channelid)
    if(response['items']):
        subcount.append(int(response['items'][0]['statistics']['subscriberCount']))
        videocount.append(int(response['items'][0]['statistics']['videoCount']))
        viewcount.append(int(response['items'][0]['statistics']['viewCount']))                  
#         pprint(response['items'][0]['statistics'])
#         print()
    else:
        continue
        
stats = pd.DataFrame({'Channel Name': df['Channel Name'], 'Subscriber Count': subcount,  'View Count': viewcount, 'Video Count': videocount})
stats.sort_values(by = ['Subscriber Count'], ascending=False, inplace=True)
stats

Unnamed: 0,Channel Name,Subscriber Count,View Count,Video Count
0,3Blue1Brown,5020000,326157063,128
13,Joma Tech,2150000,173877240,113
9,The Coding Train,1570000,113401685,1214
11,Sentdex,1220000,108532152,1237
14,Tech With Tim,1180000,106676591,819
10,Corey Shafer,1080000,83563990,231
1,StatQuest with Josh Starmer,894000,46541348,245
2,Krish Naik,730000,71844761,1642
5,Tina Huang,480000,20161022,119
6,Alex The Analyst,411000,15990067,198


In [3]:
chart = alt.Chart(stats, title='Subscriber Counts by Channel Name').mark_bar().encode(
    x=alt.X('Channel Name', sort='y', title=None),
    y=alt.Y('Subscriber Count'),
    color=alt.condition(alt.datum['Channel Name'] == 'Joma Tech', alt.value('blue'), alt.value('black')),
    tooltip=['Subscriber Count', 'Channel Name']
)
chart.configure_axisX(ticks=False)
chart


In [4]:
chart = alt.Chart(stats, title='View Counts by Channel Name').mark_bar().encode(
    x=alt.X('Channel Name', sort='y', title=None),
    y=alt.Y('View Count'),
    color=alt.condition(alt.datum['Channel Name'] == 'Joma Tech', alt.value('blue'), alt.value('black')),
    tooltip=['View Count', 'Channel Name']
)
chart.configure_axisX(ticks=False)
chart


In [5]:
chart = alt.Chart(stats, title='Video Counts by Channel Name').mark_bar().encode(
    x=alt.X('Channel Name', sort='y', title=None),
    y=alt.Y('Video Count'),
    color=alt.condition(alt.datum['Channel Name'] == 'Joma Tech', alt.value('blue'), alt.value('black')),
    tooltip=['Video Count', 'Channel Name']
)
chart.configure_axisX(ticks=False)
chart


# Survey Data Analysis

In [8]:
# define a function : map the values "Easy" : 1, "Medium" : 2 and "Hard" : 3
def map_values(value):
    if value == 'Easy':
        return 1
    elif value == 'Medium':
        return 2
    elif value == 'Hard':
        return 3
    else:
        return value


responses = pd.read_excel('data\\responses.xlsx')
responses.head()

Unnamed: 0,DATA 530,DATA 580,DATA 541,DATA 531,DATA 532,DATA 540,DATA 553,DATA 570,DATA 533,DATA 542,DATA 543,DATA 571,DATA 534,DATA 550,DATA 572,DATA 581,DATA 551,DATA 552,DATA 573,DATA 583
0,Easy,Hard,Easy,Medium,Easy,Easy,Medium,Easy,Easy,Easy,Medium,Medium,Medium,Medium,Easy,Hard,Medium,Easy,Hard,Hard
1,Medium,Medium,Medium,Medium,Medium,Easy,Medium,Medium,Hard,Medium,Medium,Hard,Hard,Medium,Hard,Hard,Hard,Medium,Hard,Hard
2,Easy,Medium,Medium,Medium,Hard,Easy,Easy,Medium,Easy,Medium,Easy,Medium,Hard,Easy,Medium,Hard,Medium,Medium,Hard,Hard
3,Easy,Medium,Medium,Easy,Medium,Easy,Hard,Medium,Easy,Medium,Medium,Medium,Medium,Hard,Medium,Hard,Medium,Medium,Medium,Hard
4,Easy,Medium,Easy,Hard,Medium,Easy,Easy,Medium,Easy,Medium,Hard,Easy,Medium,Medium,Easy,Medium,Easy,Easy,Medium,Medium


In [9]:
data = responses.applymap(map_values)
data.head()

Unnamed: 0,DATA 530,DATA 580,DATA 541,DATA 531,DATA 532,DATA 540,DATA 553,DATA 570,DATA 533,DATA 542,DATA 543,DATA 571,DATA 534,DATA 550,DATA 572,DATA 581,DATA 551,DATA 552,DATA 573,DATA 583
0,1,3,1,2,1,1,2,1,1,1,2,2,2,2,1,3,2,1,3,3
1,2,2,2,2,2,1,2,2,3,2,2,3,3,2,3,3,3,2,3,3
2,1,2,2,2,3,1,1,2,1,2,1,2,3,1,2,3,2,2,3,3
3,1,2,2,1,2,1,3,2,1,2,2,2,2,3,2,3,2,2,2,3
4,1,2,1,3,2,1,1,2,1,2,3,1,2,2,1,2,1,1,2,2


In [10]:
one = list(((data.eq(1).sum()).to_dict()).values())
two = list(((data.eq(2).sum()).to_dict()).values())
three = list(((data.eq(3).sum()).to_dict()).values())
courses = list(data.columns)

mydata = pd.DataFrame({'courses': courses, 'ones' : one, 'twos': two, 'threes': three})
mydata.head()

Unnamed: 0,courses,ones,twos,threes
0,DATA 530,15,3,1
1,DATA 580,3,9,7
2,DATA 541,9,10,0
3,DATA 531,9,7,3
4,DATA 532,5,11,3


In [11]:
onechart = alt.Chart(mydata, title=alt.TitleParams(text = ["Level of difficulty: Easy"])).mark_bar().encode(
                     x=alt.X('courses', sort ='y', title = None, axis = alt.Axis(ticks=False)),
                     y=alt.Y('ones', title = "Count of responses"),
                     color=alt.condition(alt.datum.ones == mydata['ones'].max(),alt.value('green'), alt.value('black')))
onechart

In [12]:
twochart = alt.Chart(mydata, title=alt.TitleParams(text = ["Level of difficulty: Medium"])).mark_bar().encode(
                     x=alt.X('courses', sort ='y', title = None, axis = alt.Axis(ticks=False)),
                     y=alt.Y('twos', title = "Count of responses"),
                     color=alt.condition(alt.datum.twos == mydata['twos'].max(),alt.value('#FAD02C'), alt.value('black')))
twochart

In [13]:
threechart = alt.Chart(mydata, title=alt.TitleParams(text = ["Level of difficulty: Hard"])).mark_bar().encode(
                       x=alt.X('courses', sort ='y', title = None, axis = alt.Axis(ticks=False)),
                       y=alt.Y('threes', title = "Count of responses"),
                       color=alt.condition(alt.datum.threes==mydata['threes'].max(),alt.value('red'),alt.value('black')))
threechart

In [14]:
mydata.sort_values(by = ['threes'], ascending = False)

Unnamed: 0,courses,ones,twos,threes
19,DATA 583,1,3,15
15,DATA 581,2,3,14
1,DATA 580,3,9,7
6,DATA 553,7,5,7
12,DATA 534,3,10,6
18,DATA 573,2,12,5
13,DATA 550,3,11,5
16,DATA 551,2,13,4
11,DATA 571,5,10,4
14,DATA 572,5,11,3


In [15]:
mydata.sort_values(by = ['twos'], ascending = False)

Unnamed: 0,courses,ones,twos,threes
16,DATA 551,2,13,4
18,DATA 573,2,12,5
4,DATA 532,5,11,3
14,DATA 572,5,11,3
13,DATA 550,3,11,5
7,DATA 570,5,11,3
11,DATA 571,5,10,4
2,DATA 541,9,10,0
12,DATA 534,3,10,6
9,DATA 542,7,10,2
