**Exploratory Data Analysing Using Youtube Video Data from Most YouTube Influencers in Germany**

In [32]:
from googleapiclient.discovery import build
import pandas as pd

In [33]:
#remove before upload to GitHub
api_key = 'AIzaSyBJ7mfYocgj8mEx3Ts41FmLGVHcSNbTd6s'

channel_ids = ["UCYJ61XIK64sp6ZFFS8sctxw", #Gronkh
               "UCTXeJ33DzXI2veQpKfrvaYw", #Julien Bam
               "UCcn4UOBvB0W2HjCcLFLuu2w", #ungespielt
               "UCLCb_YDL9XfSYsWpS5xrO5Q", #Rezo
               "UCpAMOlA_0hFXopIxMq8ar0w", #MontanaBlack
               "UCpZ_DI-ZugwMzXcqccaTVsg", #Dagi Bee
               "UCHfdTAyg5t4mb1G-3rJ6QsQ", #BibisBeautyPalace 
               "UCdFWqYbJHMZGbOs0efRwmsg", #xLL by Lisa & Lena
               "UChVRfsT_ASBZk10o0An7Ucg", #Pamela Reif
               "UCi3OE-aN09WOcN9d2stCvPg" ] #charli d'amelio

api_service_name = "youtube"
api_version = "v3"

# Get credentials and create an API client
youtube = build(api_service_name, api_version, developerKey=api_key)

In [34]:
def get_channel_stats(youtube, channel_ids):
    
    """
    Get channel stats
    
    Params:
    ------
    youtube: build object of Youtube API
    channel_ids: list of channel IDs
    
    Returns:
    ------
    dataframe with all channel stats for each channel ID
    
    """
    
    all_data = []
    
    request = youtube.channels().list(
        part="snippet,contentDetails,statistics",
        id=','.join(channel_ids)
    )
    response = request.execute()

    # loop through items
    for item in response['items']:
        data = {'channelName': item['snippet']['title'],
                'subscribers': item['statistics']['subscriberCount'],
                'views': item['statistics']['viewCount'],
                'totalVideos': item['statistics']['videoCount'],
                'playlistId': item['contentDetails']['relatedPlaylists']['uploads']
        }
        
        all_data.append(data)
        
    return(pd.DataFrame(all_data))

def get_video_ids(youtube, playlist_id):
    
    video_ids = []
    
    request = youtube.playlistItems().list(
        part="snippet,contentDetails",
        playlistId=playlist_id,
        maxResults = 30
    )
    response = request.execute()
    
    for item in response['items']:
        video_ids.append(item['contentDetails']['videoId'])
        
        for item in response['items']:
            video_ids.append(item['contentDetails']['videoId'])

    return video_ids

def get_video_details(youtube, video_ids):

    all_video_info = []
    
    for i in range(0, len(video_ids), 50):
        request = youtube.videos().list(
            part="snippet,contentDetails,statistics,topicDetails",
            id=','.join(video_ids[i:i+50])
        )
        response = request.execute() 

        for video in response['items']:
            stats_to_keep = {'snippet': ['channelTitle', 'title', 'description', 'tags', 'publishedAt'],
                             'statistics': ['viewCount', 'likeCount', 'favouriteCount', 'commentCount'],
                             'contentDetails': ['duration', 'definition', 'caption'],
                             'topicDetails': ['topicCategories']
                            }
            video_info = {}
            video_info['video_id'] = video['id']

            for k in stats_to_keep.keys():
                for v in stats_to_keep[k]:
                    try:
                        video_info[v] = video[k][v]
                    except:
                        video_info[v] = None

            all_video_info.append(video_info)
    
    return pd.DataFrame(all_video_info)

def get_comments_in_videos(youtube, video_ids):
    """
    Get top level comments as text from all videos with given IDs (only the first 10 comments due to quote limit of Youtube API)
    Params:
    
    youtube: the build object from googleapiclient.discovery
    video_ids: list of video IDs
    
    Returns:
    Dataframe with video IDs and associated top level comment in text.
    
    """
    all_comments = []
    
    for video_id in video_ids:
        try:   
            request = youtube.commentThreads().list(
                part="snippet,replies",
                videoId=video_id
            )
            response = request.execute()
        
            comments_in_video = [comment['snippet']['topLevelComment']['snippet']['textOriginal'] for comment in response['items'][0:10]]
            comments_in_video_info = {'video_id': video_id, 'comments': comments_in_video}

            all_comments.append(comments_in_video_info)
            
        except: 
            # When error occurs - most likely because comments are disabled on a video
            print('Could not get comments for video ' + video_id)
        
    return pd.DataFrame(all_comments)  

In [35]:
channel_data = get_channel_stats(youtube, channel_ids)

In [36]:
channel_data

Unnamed: 0,channelName,subscribers,views,totalVideos,playlistId
0,Julien Bam,6000000,1586520224,238,UUTXeJ33DzXI2veQpKfrvaYw
1,Pamela Reif,9190000,1566402798,184,UUhVRfsT_ASBZk10o0An7Ucg
2,Gronkh,4920000,3643330314,15073,UUYJ61XIK64sp6ZFFS8sctxw
3,MontanaBlack,2910000,323308052,371,UUpAMOlA_0hFXopIxMq8ar0w
4,rezo,1750000,191227797,49,UULCb_YDL9XfSYsWpS5xrO5Q
5,BibisBeautyPalace,5880000,2874620812,956,UUHfdTAyg5t4mb1G-3rJ6QsQ
6,ungespielt,3760000,2109958126,4745,UUcn4UOBvB0W2HjCcLFLuu2w
7,xLL by Lisa & Lena,889000,58630761,53,UUdFWqYbJHMZGbOs0efRwmsg
8,Dagi Bee,3960000,1061225365,643,UUpZ_DI-ZugwMzXcqccaTVsg
9,charli d'amelio,9480000,300921569,60,UUi3OE-aN09WOcN9d2stCvPg


In [37]:
# Create a dataframe with video statistics and comments from all channels

video_df = pd.DataFrame()
comments_df = pd.DataFrame()

for c in channel_data['channelName'].unique():
    print("Getting video information from channel: " + c)
    playlist_id = channel_data.loc[channel_data['channelName']== c, 'playlistId'].iloc[0]
    video_ids = get_video_ids(youtube, playlist_id)
    
    # get video data
    video_data = get_video_details(youtube, video_ids)
    # get comment data
    comments_data = get_comments_in_videos(youtube, video_ids)

    # append video data together and comment data toghether
    video_df = video_df.append(video_data, ignore_index=True)
    comments_df = comments_df.append(comments_data, ignore_index=True)

Getting video information from channel: Julien Bam


  video_df = video_df.append(video_data, ignore_index=True)
  comments_df = comments_df.append(comments_data, ignore_index=True)


Getting video information from channel: Pamela Reif
Could not get comments for video l4IKrZymsXU
Could not get comments for video l4IKrZymsXU
Could not get comments for video l4IKrZymsXU
Could not get comments for video l4IKrZymsXU
Could not get comments for video l4IKrZymsXU
Could not get comments for video l4IKrZymsXU
Could not get comments for video l4IKrZymsXU
Could not get comments for video l4IKrZymsXU
Could not get comments for video l4IKrZymsXU
Could not get comments for video l4IKrZymsXU
Could not get comments for video l4IKrZymsXU
Could not get comments for video l4IKrZymsXU
Could not get comments for video l4IKrZymsXU
Could not get comments for video l4IKrZymsXU
Could not get comments for video l4IKrZymsXU
Could not get comments for video l4IKrZymsXU
Could not get comments for video l4IKrZymsXU
Could not get comments for video l4IKrZymsXU
Could not get comments for video l4IKrZymsXU
Could not get comments for video l4IKrZymsXU
Could not get comments for video l4IKrZymsXU
Cou

  video_df = video_df.append(video_data, ignore_index=True)
  comments_df = comments_df.append(comments_data, ignore_index=True)


Getting video information from channel: Gronkh


  video_df = video_df.append(video_data, ignore_index=True)
  comments_df = comments_df.append(comments_data, ignore_index=True)


Getting video information from channel: MontanaBlack
Could not get comments for video Z2m2SMdEmzY
Could not get comments for video Z2m2SMdEmzY
Could not get comments for video Z2m2SMdEmzY
Could not get comments for video Z2m2SMdEmzY
Could not get comments for video Z2m2SMdEmzY
Could not get comments for video Z2m2SMdEmzY
Could not get comments for video Z2m2SMdEmzY
Could not get comments for video Z2m2SMdEmzY
Could not get comments for video Z2m2SMdEmzY
Could not get comments for video Z2m2SMdEmzY
Could not get comments for video Z2m2SMdEmzY
Could not get comments for video Z2m2SMdEmzY
Could not get comments for video Z2m2SMdEmzY
Could not get comments for video Z2m2SMdEmzY
Could not get comments for video Z2m2SMdEmzY
Could not get comments for video Z2m2SMdEmzY
Could not get comments for video Z2m2SMdEmzY
Could not get comments for video Z2m2SMdEmzY
Could not get comments for video Z2m2SMdEmzY
Could not get comments for video Z2m2SMdEmzY
Could not get comments for video Z2m2SMdEmzY
Co

  video_df = video_df.append(video_data, ignore_index=True)
  comments_df = comments_df.append(comments_data, ignore_index=True)


Getting video information from channel: rezo
Could not get comments for video _XQ2Px8dcXk


  video_df = video_df.append(video_data, ignore_index=True)
  comments_df = comments_df.append(comments_data, ignore_index=True)


Getting video information from channel: BibisBeautyPalace


  video_df = video_df.append(video_data, ignore_index=True)
  comments_df = comments_df.append(comments_data, ignore_index=True)


Getting video information from channel: ungespielt
Could not get comments for video brLBG5W0goM
Could not get comments for video h8CNQy9HNkI
Could not get comments for video IkkzVAkKM-4
Could not get comments for video IT2goTo2CGw
Could not get comments for video NKoju5auAzs
Could not get comments for video 695-R-0pBrU
Could not get comments for video RRvh0dcLfkc
Could not get comments for video 6ggZe1zTBqg
Could not get comments for video JYfhKBeUBSw
Could not get comments for video CE5VU5bIJXs
Could not get comments for video d81XZBCHq_k
Could not get comments for video vxA0BmcJfls
Could not get comments for video Nlpxg7FN7j4
Could not get comments for video brLBG5W0goM
Could not get comments for video 7qwYyhX9mTE
Could not get comments for video 7qwYyhX9mTE
Could not get comments for video 8GQImB1cQ5c
Could not get comments for video rAjKJKP-JrA
Could not get comments for video vAZ2JdOB3fg
Could not get comments for video NKoju5auAzs
Could not get comments for video wwylektNWWg
Coul

  video_df = video_df.append(video_data, ignore_index=True)
  comments_df = comments_df.append(comments_data, ignore_index=True)


HttpError: <HttpError 403 when requesting https://youtube.googleapis.com/youtube/v3/playlistItems?part=snippet%2CcontentDetails&playlistId=UUdFWqYbJHMZGbOs0efRwmsg&maxResults=30&key=AIzaSyBJ7mfYocgj8mEx3Ts41FmLGVHcSNbTd6s&alt=json returned "The request cannot be completed because you have exceeded your <a href="/youtube/v3/getting-started#quota">quota</a>.". Details: "[{'message': 'The request cannot be completed because you have exceeded your <a href="/youtube/v3/getting-started#quota">quota</a>.', 'domain': 'youtube.quota', 'reason': 'quotaExceeded'}]">

In [30]:
video_df

Unnamed: 0,video_id,channelTitle,title,description,tags,publishedAt,viewCount,likeCount,favouriteCount,commentCount,duration,definition,caption,topicCategories
0,NFNpid428c0,rezo,Katja und ich REMIXEN ihren neuen Song GUCCI G...,"Katja und ich remixen heute ihren Song ""GUCCI ...","[Katja rezo, katja k gucci girl, katja k, Katj...",2019-07-27T16:00:02Z,4055317,234556,,15469,PT15M12S,hd,false,"[https://en.wikipedia.org/wiki/Hip_hop_music, ..."
1,NFNpid428c0,rezo,Katja und ich REMIXEN ihren neuen Song GUCCI G...,"Katja und ich remixen heute ihren Song ""GUCCI ...","[Katja rezo, katja k gucci girl, katja k, Katj...",2019-07-27T16:00:02Z,4055317,234556,,15469,PT15M12S,hd,false,"[https://en.wikipedia.org/wiki/Hip_hop_music, ..."
2,HY7-7DnP0f0,rezo,Was aktuell los ist...,MEIN SHOP: http://www.rezo.de\n\n► FOLGT MIR\n...,"[Rezo, rezo ansage, rezo statement]",2019-01-27T10:30:00Z,1590777,119480,,6718,PT8M8S,hd,false,
3,kfj7Opc0R5E,rezo,TANZVERBOT macht die ROBBE 😂 Youtuber REMIXEN ...,"Tanzverbot ""Mein Leben"" gemixt mit ""Mach die R...","[julien bam, mach die robbe, tanzverbot rap, t...",2018-12-22T11:00:08Z,5442435,268716,,6921,PT12M33S,hd,false,"[https://en.wikipedia.org/wiki/Entertainment, ..."
4,hv31vOJzpps,rezo,Wenn SCHULFÄCHER Rapper wären | mit Julia Beautx,Julia und ich zeigen euch ein Rap Battle zwisc...,"[schule rap, schule song, bio rap, biologie ra...",2018-11-18T11:01:42Z,21738759,637878,,38294,PT6M19S,hd,false,"[https://en.wikipedia.org/wiki/Hip_hop_music, ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1095,H1p9k2rj-8U,Julien Bam,Das letzte MÄRCHEN IN ASOZIAL (Part 2) | Julie...,Wir sind zurück mit Part 2 vom letzten Märchen...,"[Julien Bam, das letzte Märchen in asozial, mä...",2021-12-11T15:00:10Z,3658345,313859,,8465,PT13M9S,hd,false,[https://en.wikipedia.org/wiki/Entertainment]
1096,hSREEzhCB6k,Julien Bam,Das letzte MÄRCHEN IN ASOZIAL (Part 1) I Julie...,Nach 707 Tagen sind wir zurück mit Part 1 vom ...,"[Julien Bam, das letzte märchen in asozial, mä...",2021-11-27T15:00:11Z,4529769,430730,,15213,PT11M35S,hd,false,[https://en.wikipedia.org/wiki/Entertainment]
1097,Rjg9YGuKCuw,Julien Bam,Meine letzten Worte auf diesem Kanal I Julien Bam,NEW: Kurzes Statement hier: https://www.youtub...,"[Meine letzten Worte auf diesem Kanal, Julien ...",2019-12-21T11:00:04Z,7375236,618900,,65169,PT13M22S,hd,false,[https://en.wikipedia.org/wiki/Music]
1098,9ygh5pKT9ZU,Julien Bam,Wie isch dein MUTTER getroffen hab (SITCOM) | ...,Unsere eigene Sitcom WIE ISCH DEIN MUTTER GETR...,"[Wie isch dein Mutter getroffen hab, Wie ich d...",2019-12-07T11:00:02Z,4349873,242223,,7622,PT13M30S,hd,false,"[https://en.wikipedia.org/wiki/Entertainment, ..."


In [31]:
comments_df

Unnamed: 0,video_id,comments
0,NFNpid428c0,[kannsu.. also... kannsu mir zum Geburtstag......
1,NFNpid428c0,[kannsu.. also... kannsu mir zum Geburtstag......
2,HY7-7DnP0f0,"[Ich hoffe, dass ihr versteht, was ich meine. ..."
3,kfj7Opc0R5E,"[Völlig sinnfrei! Nicht einen Cent wert!, Ich ..."
4,hv31vOJzpps,[Julia und ich haben uns echt mega viel Mühe g...
...,...,...
1073,H1p9k2rj-8U,[Ein LIKE auf entspannt da lassen? Der Algo da...
1074,hSREEzhCB6k,[WE'RE BACK - in 2 Wochen droppt der nächste T...
1075,Rjg9YGuKCuw,[Kurzes Update hier: https://www.youtube.com/w...
1076,9ygh5pKT9ZU,[MEINE NEUE KOLLEKTION IST DA: \nhttps://bamsh...


In [24]:
#add ChannelTitle also to comments_df