**Exploratory Data Analysing Using Youtube Video Data from Most YouTube Influencers in Germany**

In [1]:
from googleapiclient.discovery import build
import pandas as pd

In [2]:
#remove before upload to GitHub
api_key = 'AIzaSyBJ7mfYocgj8mEx3Ts41FmLGVHcSNbTd6s'

channel_ids = ["UCYJ61XIK64sp6ZFFS8sctxw", #Gronkh
               "UCTXeJ33DzXI2veQpKfrvaYw", #Julien Bam
               "UCcn4UOBvB0W2HjCcLFLuu2w", #ungespielt
               "UCLCb_YDL9XfSYsWpS5xrO5Q", #Rezo
               "UCpAMOlA_0hFXopIxMq8ar0w", #MontanaBlack
               "UCpZ_DI-ZugwMzXcqccaTVsg", #Dagi Bee
               "UCHfdTAyg5t4mb1G-3rJ6QsQ", #BibisBeautyPalace 
               "UCdFWqYbJHMZGbOs0efRwmsg", #xLL by Lisa & Lena
               "UChVRfsT_ASBZk10o0An7Ucg", #Pamela Reif
               "UCi3OE-aN09WOcN9d2stCvPg" ] #charli d'amelio

api_service_name = "youtube"
api_version = "v3"

# Get credentials and create an API client
youtube = build(api_service_name, api_version, developerKey=api_key)

In [3]:
def get_channel_stats(youtube, channel_ids):
    
    """
    Get channel stats
    
    Params:
    ------
    youtube: build object of Youtube API
    channel_ids: list of channel IDs
    
    Returns:
    ------
    dataframe with all channel stats for each channel ID
    
    """
    
    all_data = []
    
    request = youtube.channels().list(
        part="snippet,contentDetails,statistics",
        id=','.join(channel_ids)
    )
    response = request.execute()

    # loop through items
    for item in response['items']:
        data = {'channelName': item['snippet']['title'],
                'subscribers': item['statistics']['subscriberCount'],
                'views': item['statistics']['viewCount'],
                'totalVideos': item['statistics']['videoCount'],
                'playlistId': item['contentDetails']['relatedPlaylists']['uploads']
        }
        
        all_data.append(data)
        
    return(pd.DataFrame(all_data))

def get_video_ids(youtube, playlist_id):
    
    video_ids = []
    
    request = youtube.playlistItems().list(
        part="snippet,contentDetails",
        playlistId=playlist_id,
        maxResults = 30
    )
    response = request.execute()
    
    for item in response['items']:
        video_ids.append(item['contentDetails']['videoId'])
        
        for item in response['items']:
            video_ids.append(item['contentDetails']['videoId'])

    return video_ids

def get_video_details(youtube, video_ids):

    all_video_info = []
    
    for i in range(0, len(video_ids), 50):
        request = youtube.videos().list(
            part="snippet,contentDetails,statistics,topicDetails",
            id=','.join(video_ids[i:i+50])
        )
        response = request.execute() 

        for video in response['items']:
            stats_to_keep = {'snippet': ['channelTitle', 'title', 'description', 'tags', 'publishedAt'],
                             'statistics': ['viewCount', 'likeCount', 'favouriteCount', 'commentCount'],
                             'contentDetails': ['duration', 'definition', 'caption'],
                             'topicDetails': ['topicCategories']
                            }
            video_info = {}
            video_info['video_id'] = video['id']

            for k in stats_to_keep.keys():
                for v in stats_to_keep[k]:
                    try:
                        video_info[v] = video[k][v]
                    except:
                        video_info[v] = None

            all_video_info.append(video_info)
    
    return pd.DataFrame(all_video_info)

def get_comments_in_videos(youtube, video_ids):
    """
    Get top level comments as text from all videos with given IDs (only the first 10 comments due to quote limit of Youtube API)
    Params:
    
    youtube: the build object from googleapiclient.discovery
    video_ids: list of video IDs
    
    Returns:
    Dataframe with video IDs and associated top level comment in text.
    
    """
    all_comments = []
    
    for video_id in video_ids:
        try:   
            request = youtube.commentThreads().list(
                part="snippet,replies",
                videoId=video_id
            )
            response = request.execute()
        
            comments_in_video = [comment['snippet']['topLevelComment']['snippet']['textOriginal'] for comment in response['items'][0:10]]
            comments_in_video_info = {'video_id': video_id, 'comments': comments_in_video}

            all_comments.append(comments_in_video_info)
            
        except: 
            # When error occurs - most likely because comments are disabled on a video
            print('Could not get comments for video ' + video_id)
        
    return pd.DataFrame(all_comments)  

In [4]:
channel_data = get_channel_stats(youtube, channel_ids)

In [5]:
channel_data

Unnamed: 0,channelName,subscribers,views,totalVideos,playlistId
0,Dagi Bee,3960000,1061225365,643,UUpZ_DI-ZugwMzXcqccaTVsg
1,BibisBeautyPalace,5880000,2874620812,956,UUHfdTAyg5t4mb1G-3rJ6QsQ
2,rezo,1750000,191227797,49,UULCb_YDL9XfSYsWpS5xrO5Q
3,ungespielt,3760000,2109958126,4745,UUcn4UOBvB0W2HjCcLFLuu2w
4,Gronkh,4920000,3643330314,15073,UUYJ61XIK64sp6ZFFS8sctxw
5,MontanaBlack,2910000,323308052,371,UUpAMOlA_0hFXopIxMq8ar0w
6,Pamela Reif,9190000,1566402798,184,UUhVRfsT_ASBZk10o0An7Ucg
7,charli d'amelio,9480000,300921569,60,UUi3OE-aN09WOcN9d2stCvPg
8,xLL by Lisa & Lena,889000,58630761,53,UUdFWqYbJHMZGbOs0efRwmsg
9,Julien Bam,6000000,1586520224,238,UUTXeJ33DzXI2veQpKfrvaYw


In [6]:
# Create a dataframe with video statistics and comments from all channels

video_df = pd.DataFrame()
comments_df = pd.DataFrame()

for c in channel_data['channelName'].unique():
    print("Getting video information from channel: " + c)
    playlist_id = channel_data.loc[channel_data['channelName']== c, 'playlistId'].iloc[0]
    video_ids = get_video_ids(youtube, playlist_id)
    
    # get video data
    video_data = get_video_details(youtube, video_ids)
    # get comment data
    comments_data = get_comments_in_videos(youtube, video_ids)

    # append video data together and comment data toghether
    video_df = video_df.append(video_data, ignore_index=True)
    comments_df = comments_df.append(comments_data, ignore_index=True)

Getting video information from channel: Dagi Bee


HttpError: <HttpError 403 when requesting https://youtube.googleapis.com/youtube/v3/videos?part=snippet%2CcontentDetails%2Cstatistics%2CtopicDetails&id=3w-EyQ6P0sU%2C3w-EyQ6P0sU%2CJBavdh_I3DQ%2CDKAQorfKg_E%2C4WC1KE5vfuA%2CiYJlDKPaQMk%2Ch3zYPkxHeX4%2CsACBGH7-SgM%2CRot2f5QTQls%2CvHxwQ-rlwkM%2C7ngXXbROZME%2CSPAtaut9WJs%2CuB1lTvVNuQI%2Cx0gUmNpuPOw%2C8-LTPLsaejQ%2C1tRQF_thZxo%2C10Xano3bfxw%2ChYAlDUheKa0%2CEswrGUqGVZE%2Co6TTUoq-rjU%2CJM7MgHt-K0k%2CA93i4d07RgY%2CnEt_YEK1hqI%2CRQP-XZEgXwA%2Coz7y3PHyryA%2CIvfjqlBWcWU%2CY-wIYaHfhWU%2CjeeDgIadozM%2CqWGX9VQFjqY%2Ch_yzFmsm5u8%2CTZKpCZ-q_lY%2CJBavdh_I3DQ%2C3w-EyQ6P0sU%2CJBavdh_I3DQ%2CDKAQorfKg_E%2C4WC1KE5vfuA%2CiYJlDKPaQMk%2Ch3zYPkxHeX4%2CsACBGH7-SgM%2CRot2f5QTQls%2CvHxwQ-rlwkM%2C7ngXXbROZME%2CSPAtaut9WJs%2CuB1lTvVNuQI%2Cx0gUmNpuPOw%2C8-LTPLsaejQ%2C1tRQF_thZxo%2C10Xano3bfxw%2ChYAlDUheKa0%2CEswrGUqGVZE&key=AIzaSyBJ7mfYocgj8mEx3Ts41FmLGVHcSNbTd6s&alt=json returned "The request cannot be completed because you have exceeded your <a href="/youtube/v3/getting-started#quota">quota</a>.". Details: "[{'message': 'The request cannot be completed because you have exceeded your <a href="/youtube/v3/getting-started#quota">quota</a>.', 'domain': 'youtube.quota', 'reason': 'quotaExceeded'}]">

In [7]:
video_df

<class 'pandas.core.frame.DataFrame'>
Index: 0 entries
Empty DataFrame


In [31]:
comments_df

Unnamed: 0,video_id,comments
0,NFNpid428c0,[kannsu.. also... kannsu mir zum Geburtstag......
1,NFNpid428c0,[kannsu.. also... kannsu mir zum Geburtstag......
2,HY7-7DnP0f0,"[Ich hoffe, dass ihr versteht, was ich meine. ..."
3,kfj7Opc0R5E,"[Völlig sinnfrei! Nicht einen Cent wert!, Ich ..."
4,hv31vOJzpps,[Julia und ich haben uns echt mega viel Mühe g...
...,...,...
1073,H1p9k2rj-8U,[Ein LIKE auf entspannt da lassen? Der Algo da...
1074,hSREEzhCB6k,[WE'RE BACK - in 2 Wochen droppt der nächste T...
1075,Rjg9YGuKCuw,[Kurzes Update hier: https://www.youtube.com/w...
1076,9ygh5pKT9ZU,[MEINE NEUE KOLLEKTION IST DA: \nhttps://bamsh...


In [24]:
#add ChannelTitle also to comments_df