In [1]:
# Installing the relevant packages
# !pip install google-api-python-client

In [2]:
# Importing the relevant packages
import os
import googleapiclient.discovery
import numpy as np
import pandas as pd
# import emoji
from datetime import datetime

In [3]:
# Setting configuration for Search API Query
config = {"part":"snippet",
          "channelId":"UCFFDDhKEm6nyFWV8WncAOkw",
          "maxResults":500,
          "order":"date",
          "publishedAfter":"2017-11-01T00:00:00Z",
          "type":"video"}

# Empty list for the results of the Youtube Search API Query
all_responses = []

# Function for the Youtube Search API Query
def main():
    # Disable OAuthlib's HTTPS verification when running locally.
    # *DO NOT* leave this option enabled in production.
    os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "1"

    api_service_name = "youtube"
    api_version = "v3"
    DEVELOPER_KEY = "AIzaSyAm05u8SWGevg6CfVJWXfPw_lST1HlqI8g"

    # Get credentials and create an API client
    youtube = googleapiclient.discovery.build(
        api_service_name, api_version, developerKey = DEVELOPER_KEY)

    request = youtube.search().list(**config)
    response = request.execute()
    
    while response:
        all_responses.append(response)
        if 'nextPageToken' in response:
            config['pageToken'] = response['nextPageToken']
            response = youtube.search().list(**config).execute()
        else:
            break    

if __name__ == "__main__":
     main()

In [4]:
all_responses

[{'kind': 'youtube#searchListResponse',
  'etag': 'dWr5cl5yIADV_UllbJ_wd4dLTrU',
  'nextPageToken': 'CDIQAA',
  'regionCode': 'GB',
  'pageInfo': {'totalResults': 215, 'resultsPerPage': 50},
  'items': [{'kind': 'youtube#searchResult',
    'etag': '0yywqsY1MbtPGZqoPlOEvJjbc6U',
    'id': {'kind': 'youtube#video', 'videoId': 'QWeoUS1_Ufw'},
    'snippet': {'publishedAt': '2021-07-09T18:56:29Z',
     'channelId': 'UCFFDDhKEm6nyFWV8WncAOkw',
     'title': 'MENSWEAR FOR LESS THAN £70 AT SELFRIDFES | REISS, PREVU, HUGO, NO91',
     'description': 'SELFRIDGES SALE IS LIVE - https://tidd.ly/2SYlD9o In this video I look at some low key brands for less than a £100 budget to try for those who are looking to ...',
     'thumbnails': {'default': {'url': 'https://i.ytimg.com/vi/QWeoUS1_Ufw/default.jpg',
       'width': 120,
       'height': 90},
      'medium': {'url': 'https://i.ytimg.com/vi/QWeoUS1_Ufw/mqdefault.jpg',
       'width': 320,
       'height': 180},
      'high': {'url': 'https://i.yt

In [5]:
# Empty Lists which will form the Columns of the Video Description Dataframe
upload_date = []
video_id = []
video_title = []
video_description = []

# Looping through the JSON response to append values to empty column lists
for level in range(len(all_responses)):
    for sub_level in range(len(all_responses[level]['items'])):
        upload_date.append(all_responses[level]['items'][sub_level]['snippet']['publishedAt'])
        video_id.append(all_responses[level]['items'][sub_level]['id']['videoId'])
        video_title.append(all_responses[level]['items'][sub_level]['snippet']['title'])
        video_description.append(all_responses[level]['items'][sub_level]['snippet']['description'])

# Converting date for consistency across files
upload_date = [datetime.strptime(upload_date[x], "%Y-%m-%dT%H:%M:%SZ") for x in range(len(upload_date))]
upload_date = [x.strftime('%Y/%m/%d') for x in upload_date]

# # Removing Emojis to prevent encoding issues
# video_title = [emoji.demojize(x, delimiters=("#*", "#*")) for x in video_title]
# video_description = [emoji.demojize(x, delimiters=("#*", "#*")) for x in video_description]
        
# Creating the Video Description Dataframe       
video_description_df = pd.DataFrame(list(zip(upload_date, video_id, video_title, video_description)),
                             columns =['date', 'video_id','video_title', 'video_description'])

video_description_df.head()

Unnamed: 0,date,video_id,video_title,video_description
0,2021/07/09,QWeoUS1_Ufw,MENSWEAR FOR LESS THAN £70 AT SELFRIDFES | REI...,SELFRIDGES SALE IS LIVE - https://tidd.ly/2SYl...
1,2021/07/06,XrzJwUzKh0U,I PASSED THROUGH DESIGNER POP UP TO SEE WHAT&#...,Visit DPUS New Website and get an EXTRA 15% of...
2,2021/06/22,3Cg8-eoNXdY,SLIM FIT DENIM JEANS LATEST PICK UPS + COLLECT...,This video has been sponsored by @FarFetch GET...
3,2021/06/13,zgXoCGUcR9w,"☀️☀️SUMMER OUTFIT PICKUPS UNBOXING | TMONCLER,...",THIS VIDEO HAS BEEN SPONSORED BY @SEVEN STORE ...
4,2021/06/09,AFpME2E2GAY,MONTHLY DESIGNER SELECTIONS UNBOXING &amp; TRY...,THIS VIDEO HAS BEEN SPONSORED BY LUISAVIAROMA ...


In [6]:
# Defining function to extract information from the comment
def extract_infos_from_comment(comment,fields=["textOriginal"]):
    snippet = comment.get("snippet")
    if(snippet):
        return( {key:snippet.get(key) for key in fields})
    else:
        return(None)

# Defining function to get comment from raw result
def get_comment_from_raw_result(result,fields=["textOriginal"]):
    main_comment = None
    replies = []
    snippet = result.get("snippet")
    if(snippet):
        top_level_comment = snippet.get("topLevelComment")
        if(top_level_comment):
            main_comment = extract_infos_from_comment(top_level_comment,fields=fields)
        else:
            pass
    else:
        pass
    list_replies = result.get("replies")
    if(list_replies):
        comments = list_replies.get("comments")
        if(comments):
            for comment in comments:
                replies.append(extract_infos_from_comment(comment,fields=fields))
    dic = {"Main_comment":main_comment,"replies":replies}            
    return(dic)

# Defining a function to get all comments from results
def get_all_comments_from_results(results,fields=["textOriginal"]):
    items = results.get("items")
    all_comments = [get_comment_from_raw_result(item,fields=fields) for item in items]
    return(all_comments)

# Defining a function to get all comments
def get_all_comments(config_request,fields=["textOriginal"],verbose=False):
    all_comments = []
    
    os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "1"

    api_service_name = "youtube"
    api_version = "v3"
    DEVELOPER_KEY = "AIzaSyAm05u8SWGevg6CfVJWXfPw_lST1HlqI8g"

    # Get credentials and create an API client
    youtube = googleapiclient.discovery.build(
        api_service_name, api_version, developerKey = DEVELOPER_KEY)
    
    results = youtube.commentThreads().list(**config_request).execute()
    current_page = 0
    n_total_comments = 0
    while results:
        current_page += 1
        if( verbose):
            print("parsing comments for page {}..".format(current_page))
        comments_this_page = get_all_comments_from_results(results,fields=fields)
        n_comments = len(comments_this_page)
        n_total_comments += n_comments
        if(verbose):
            print("Found {} comments on this page..".format(n_comments))
        all_comments.append(comments_this_page)
        if 'nextPageToken' in results:
                    config_request['pageToken'] = results['nextPageToken']
                    results = youtube.commentThreads().list(**config_request).execute()
        else:
            break
    dic = {"all_comments":all_comments,"n_comments":n_total_comments}
    return(dic)

In [7]:
# Empty Lists which will form the Columns of the Video Comments Dataframe
comments_temp = []
comment_time_temp = []
video_id_temp = []
comment_author_temp = []
like_count_temp = []
reply_flag_temp = []

# Looping through all videos in the Video Description Dataframe
for video in video_id:
    
    # Pulling comments from a selected video
    config_request = {"part":"id,snippet,replies",
                      "order":"time",
                      "videoId": video}

    fields = ["textOriginal","publishedAt",'videoId','authorDisplayName','likeCount']

    all_comments = get_all_comments(config_request,fields=fields,verbose=True)

    # Looping through the JSON response to append values to empty column lists
    for full_list in all_comments["all_comments"]:
        for element in full_list:
            for j in element:
                if j == "Main_comment":
                    comments_temp.append(element["Main_comment"]["textOriginal"])
                    comment_time_temp.append(element["Main_comment"]["publishedAt"])
                    video_id_temp.append(element["Main_comment"]["videoId"])
                    comment_author_temp.append(element["Main_comment"]["authorDisplayName"])
                    like_count_temp.append(element["Main_comment"]["likeCount"])
                    reply_flag_temp.append(0)
                elif j  == "replies":
                    for reply in element["replies"]:
                        comments_temp.append(reply["textOriginal"])
                        comment_time_temp.append(reply["publishedAt"])
                        video_id_temp.append(reply["videoId"])
                        comment_author_temp.append(reply["authorDisplayName"])
                        like_count_temp.append(reply["likeCount"])
                        reply_flag_temp.append(1)

parsing comments for page 1..
Found 10 comments on this page..
parsing comments for page 1..
Found 20 comments on this page..
parsing comments for page 2..
Found 8 comments on this page..
parsing comments for page 1..
Found 20 comments on this page..
parsing comments for page 1..
Found 20 comments on this page..
parsing comments for page 2..
Found 4 comments on this page..
parsing comments for page 1..
Found 20 comments on this page..
parsing comments for page 2..
Found 5 comments on this page..
parsing comments for page 1..
Found 20 comments on this page..
parsing comments for page 2..
Found 3 comments on this page..
parsing comments for page 1..
Found 12 comments on this page..
parsing comments for page 1..
Found 18 comments on this page..
parsing comments for page 1..
Found 20 comments on this page..
parsing comments for page 2..
Found 5 comments on this page..
parsing comments for page 1..
Found 20 comments on this page..
parsing comments for page 2..
Found 3 comments on this page.

parsing comments for page 4..
Found 20 comments on this page..
parsing comments for page 5..
Found 20 comments on this page..
parsing comments for page 6..
Found 8 comments on this page..
parsing comments for page 1..
Found 20 comments on this page..
parsing comments for page 2..
Found 20 comments on this page..
parsing comments for page 3..
Found 20 comments on this page..
parsing comments for page 4..
Found 20 comments on this page..
parsing comments for page 5..
Found 13 comments on this page..
parsing comments for page 1..
Found 20 comments on this page..
parsing comments for page 2..
Found 20 comments on this page..
parsing comments for page 3..
Found 20 comments on this page..
parsing comments for page 4..
Found 20 comments on this page..
parsing comments for page 5..
Found 20 comments on this page..
parsing comments for page 6..
Found 5 comments on this page..
parsing comments for page 1..
Found 20 comments on this page..
parsing comments for page 2..
Found 20 comments on this p

parsing comments for page 2..
Found 20 comments on this page..
parsing comments for page 3..
Found 20 comments on this page..
parsing comments for page 4..
Found 20 comments on this page..
parsing comments for page 5..
Found 3 comments on this page..
parsing comments for page 1..
Found 20 comments on this page..
parsing comments for page 2..
Found 20 comments on this page..
parsing comments for page 3..
Found 20 comments on this page..
parsing comments for page 4..
Found 20 comments on this page..
parsing comments for page 5..
Found 20 comments on this page..
parsing comments for page 6..
Found 20 comments on this page..
parsing comments for page 7..
Found 20 comments on this page..
parsing comments for page 8..
Found 20 comments on this page..
parsing comments for page 9..
Found 20 comments on this page..
parsing comments for page 10..
Found 20 comments on this page..
parsing comments for page 11..
Found 20 comments on this page..
parsing comments for page 12..
Found 8 comments on thi

parsing comments for page 3..
Found 20 comments on this page..
parsing comments for page 4..
Found 20 comments on this page..
parsing comments for page 5..
Found 20 comments on this page..
parsing comments for page 6..
Found 20 comments on this page..
parsing comments for page 7..
Found 20 comments on this page..
parsing comments for page 8..
Found 20 comments on this page..
parsing comments for page 9..
Found 20 comments on this page..
parsing comments for page 10..
Found 15 comments on this page..
parsing comments for page 1..
Found 20 comments on this page..
parsing comments for page 2..
Found 20 comments on this page..
parsing comments for page 3..
Found 20 comments on this page..
parsing comments for page 4..
Found 14 comments on this page..
parsing comments for page 1..
Found 20 comments on this page..
parsing comments for page 2..
Found 20 comments on this page..
parsing comments for page 3..
Found 20 comments on this page..
parsing comments for page 4..
Found 6 comments on this

parsing comments for page 2..
Found 20 comments on this page..
parsing comments for page 3..
Found 3 comments on this page..
parsing comments for page 1..
Found 20 comments on this page..
parsing comments for page 2..
Found 3 comments on this page..
parsing comments for page 1..
Found 20 comments on this page..
parsing comments for page 2..
Found 20 comments on this page..
parsing comments for page 3..
Found 20 comments on this page..
parsing comments for page 4..
Found 20 comments on this page..
parsing comments for page 5..
Found 20 comments on this page..
parsing comments for page 6..
Found 20 comments on this page..
parsing comments for page 7..
Found 20 comments on this page..
parsing comments for page 8..
Found 20 comments on this page..
parsing comments for page 9..
Found 1 comments on this page..
parsing comments for page 1..
Found 20 comments on this page..
parsing comments for page 2..
Found 20 comments on this page..
parsing comments for page 3..
Found 4 comments on this pag

parsing comments for page 1..
Found 20 comments on this page..
parsing comments for page 2..
Found 20 comments on this page..
parsing comments for page 3..
Found 20 comments on this page..
parsing comments for page 4..
Found 20 comments on this page..
parsing comments for page 5..
Found 10 comments on this page..
parsing comments for page 1..
Found 20 comments on this page..
parsing comments for page 2..
Found 20 comments on this page..
parsing comments for page 3..
Found 1 comments on this page..
parsing comments for page 1..
Found 20 comments on this page..
parsing comments for page 2..
Found 20 comments on this page..
parsing comments for page 3..
Found 20 comments on this page..
parsing comments for page 4..
Found 20 comments on this page..
parsing comments for page 5..
Found 20 comments on this page..
parsing comments for page 6..
Found 20 comments on this page..
parsing comments for page 7..
Found 20 comments on this page..
parsing comments for page 8..
Found 11 comments on this 

In [8]:
# # Removing Emojis to prevent encoding issues
# comments_temp = [emoji.demojize(x, delimiters=("#*", "#*")) for x in comments_temp]
# comment_author_temp = [emoji.demojize(x, delimiters=("#*", "#*")) for x in comment_author_temp] 

# Converting date for consistency across files
comment_time_temp = [datetime.strptime(comment_time_temp[x], "%Y-%m-%dT%H:%M:%SZ") for x in range(len(comment_time_temp))]
comment_time_temp = [x.strftime('%Y/%m/%d') for x in comment_time_temp]

In [9]:
# Creating Video Comments dataframe
video_comments_df = pd.DataFrame({'date': comment_time_temp,
                                  'video_id': video_id_temp,
                                  'comment': comments_temp,
                                  'author': comment_author_temp,
                                  'likes': like_count_temp,
                                  'reply_flag': reply_flag_temp})
video_comments_df.head()

Unnamed: 0,date,video_id,comment,author,likes,reply_flag
0,2021/07/11,QWeoUS1_Ufw,"Reiss is a great shout mate, I have told my ma...",DeanFagan Music,0,0
1,2021/07/10,QWeoUS1_Ufw,"How many times did he say ""low-key"" 🤣",GARETH IRWIN,0,0
2,2021/07/10,QWeoUS1_Ufw,Great vid mate,adam Johnson,0,0
3,2021/07/10,QWeoUS1_Ufw,This is good I'm saving up for a PS5 and Ninte...,Wolverine Scratch,1,0
4,2021/07/09,QWeoUS1_Ufw,Please do some fake vs real stone island,Will Ctypx,0,0


In [10]:
# Empty list for the results of the Youtube Video API Query
all_statistics_info = []

# Function for the Youtube Video (Statistics-specific) API Query
def main():
    # Disable OAuthlib's HTTPS verification when running locally.
    # *DO NOT* leave this option enabled in production.
    os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "1"

    api_service_name = "youtube"
    api_version = "v3"
    DEVELOPER_KEY = "AIzaSyAm05u8SWGevg6CfVJWXfPw_lST1HlqI8g"

    youtube = googleapiclient.discovery.build(
        api_service_name, api_version, developerKey = DEVELOPER_KEY)
    
    # Looping through all videos in the Video Description Dataframe
    for video in video_id:

        request = youtube.videos().list(
            part="statistics",
            id=video
        )
        response = request.execute()

        all_statistics_info.append(response)

if __name__ == "__main__":
    main()

In [11]:
# Empty Lists which will form the Columns of the Video Statistics Dataframe
views = []
likes = []
dislikes = []
comments_count = []

# Looping through the JSON response to append values to empty column lists
for level in range(len(all_statistics_info)):
    views.append(all_statistics_info[level]['items'][0]['statistics']['viewCount'])
    likes.append(all_statistics_info[level]['items'][0]['statistics']['likeCount'])
    dislikes.append(all_statistics_info[level]['items'][0]['statistics']['dislikeCount'])
    comments_count.append(all_statistics_info[level]['items'][0]['statistics']['commentCount'])

# Creating the Video Statistics Dataframe    
video_statistics_df = pd.DataFrame(list(zip(video_id, views, likes, dislikes, comments_count)),
                                   columns =['video_id','views', 'likes', 'dislikes', 'comments'])

video_statistics_df.head()

Unnamed: 0,video_id,views,likes,dislikes,comments
0,QWeoUS1_Ufw,2276,87,1,10
1,XrzJwUzKh0U,8586,364,4,31
2,3Cg8-eoNXdY,3275,106,8,36
3,zgXoCGUcR9w,4295,173,7,39
4,AFpME2E2GAY,5642,209,3,38


In [12]:
# Empty list for the results of the Youtube Video API Query
all_tags = []

# Function for the Youtube Video (Snippet-specifc) API Query
def main():
    # Disable OAuthlib's HTTPS verification when running locally.
    # *DO NOT* leave this option enabled in production.
    os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "1"

    api_service_name = "youtube"
    api_version = "v3"
    DEVELOPER_KEY = "AIzaSyAm05u8SWGevg6CfVJWXfPw_lST1HlqI8g"

    youtube = googleapiclient.discovery.build(
        api_service_name, api_version, developerKey = DEVELOPER_KEY)
    
    # Looping through all videos in the Video Description Dataframe
    for video in video_id:

        request = youtube.videos().list(
            part="snippet",
            id=video
        )
        response = request.execute()

        all_tags.append(response)

if __name__ == "__main__":
    main()

In [13]:
# Empty Lists which will form the Tag-related Columns of the Video Comments Dataframe
# and will replace the description column in the Video Description Dataframe
# orginal description column does not contain full video descriptions
tags = []
full_descriptions = []

# Looping through the JSON response to append values to empty column lists
for level in range(len(all_tags)):
    # Appending blank values in cases where a tag is not present 
    # as a tag was not present for the video
    try:
        tags.append(all_tags[level]['items'][0]['snippet']['tags'])
    except KeyError as e:
        tags.append([""])
    full_descriptions.append(all_tags[level]['items'][0]['snippet']['description'])

# Replacing video description column in Video Description Dataframe
video_description_df["video_description"] = full_descriptions
video_description_df.head()

# Creating Video Tags Dataframe
video_tags_df = pd.DataFrame(tags)

# Creating column names for Video Tags Dataframe
column_names = [*range(1, video_tags_df.shape[1]+1)]
video_tags_df.columns = column_names

# Adding Video ID Column to Video Tags Dataframe
video_tags_df["video_id"] = video_id

# Restructing Video Tags Dataframe (Unpivotting, Removing Rows with NAs, Sorting Rows)
video_tags_df = video_tags_df.melt(id_vars=['video_id'], var_name='tag_number', value_name='tag_value')
video_tags_df.dropna(axis=0, inplace=True)
video_tags_df.sort_values(by=["video_id", "tag_number"], ascending=[False, True], inplace=True)

video_tags_df.head()

Unnamed: 0,video_id,tag_number,tag_value
69,zjbRIzeGBz8,1,mens fashion
281,zjbRIzeGBz8,2,Gucci hoody
493,zjbRIzeGBz8,3,gucci hoodie review
705,zjbRIzeGBz8,4,mens gucci
917,zjbRIzeGBz8,5,mens givenchy


In [14]:
# Replacing semi colons with commas as it will be delimiter
video_description_df['video_description'] = video_description_df['video_description'].str.replace(';', ',')
video_description_df['video_title'] = video_description_df['video_title'].str.replace(';', ',')
video_comments_df['comment'] = video_comments_df['comment'].str.replace(';', ',')
video_comments_df['author'] = video_comments_df['author'].str.replace(';', ',')

In [15]:
# Removing non ascii characters and newlines, tabs, carriage returns
video_comments_df['comment'] = video_comments_df['comment'].str.encode('ascii', 'ignore').str.decode('ascii')
video_comments_df['comment'].replace(to_replace=[r"\\t|\\n|\\r", "\t|\n|\r"], value=["",""], regex=True, inplace=True)

In [16]:
# Saving Dataframes
video_description_df.to_csv("Video_Description_Table.csv", index=False, sep = ';')
video_statistics_df.to_csv("Video_Statistics_Table.csv", index=False,sep = ';')
video_comments_df.to_csv("Video_Comments_Table.csv", index=False, sep = ';')
video_tags_df.to_csv("Video_Tags_Table.csv", index=False, sep = ';')