In [2]:
# Define the API settings
CLIENT_SECRETS_FILE = "client_secret.json"

SCOPES = ['https://www.googleapis.com/auth/youtube.force-ssl']
API_SERVICE_NAME = 'youtube'
API_VERSION = 'v3'

# Import necessary modules, mostly google apiclient and
# google authentication related modules
import os
import pickle
import google.oauth2.credentials
 
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
 
# function that authenticates with the credentials related
# to Google APIs and returns a service that can be used to
# interact with YouTube API

def get_authenticated_service():
    credentials = None
    if os.path.exists('token.pickle'):
        with open('token.pickle', 'rb') as token:
            credentials = pickle.load(token)
    #  Check if the credentials are invalid or do not exist
    if not credentials or not credentials.valid:
        # Check if the credentials have expired
        if credentials and credentials.expired and credentials.refresh_token:
            credentials.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(
                CLIENT_SECRETS_FILE, SCOPES)
            credentials = flow.run_console()

        # Save the credentials for the next run
        with open('token.pickle', 'wb') as token:
            pickle.dump(credentials, token)

    return build(API_SERVICE_NAME, API_VERSION, credentials = credentials)



In [3]:
# Get the service
os.environ['OAUTHLIB_INSECURE_TRANSPORT'] = '1'
youtube = get_authenticated_service()

Please visit this URL to authorize this application: https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=385788974097-fdba3lngr32rqs8vc5271ps9csd0o4v7.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fyoutube.force-ssl&state=ynhv5yImA2HdvOS8JhQCRU51dY0zp1&prompt=consent&access_type=offline
Enter the authorization code: 4/1AY0e-g4GmwsTH4mv1YN70xz9Jz7uDBIY9s4ZwH4dYzycy8gUEiNzC0ztf2Y


### extract comments by video id (useful for taking comments of only one video)

In [4]:
data_dict = dict()
i = 0
video_id = "xWrtgnCTmj4" 
#video_title = item['snippet']['title']
video_title = "Trump delivers remarks in Pennsylvania"

first_page = True
page_token = None
nextPageExist = True
while nextPageExist:
    if first_page:
        comment_threads = youtube.commentThreads().list(part="snippet",
                                                        maxResults = 50,
                                                        videoId = video_id).execute()
        first_page = False

    else:
        comment_threads = youtube.commentThreads().list(part="snippet",
                                                        maxResults = 50,
                                                        pageToken = page_token,
                                                        videoId = video_id).execute()

    for comment_thread in comment_threads['items']:
        comment = comment_thread['snippet']['topLevelComment']['snippet']['textDisplay']
        comment_id = comment_thread['snippet']['topLevelComment']['id']
        like_count = comment_thread['snippet']['topLevelComment']['snippet']['likeCount']
        publish_date = comment_thread['snippet']['topLevelComment']['snippet']['publishedAt']
        data_dict[i] = {'video_id':video_id,
                        'video_title':video_title,
                        'comment_id':comment_id,
                        'comment_date':publish_date,
                        'comment':comment,
                        'like_count':like_count}
        i += 1

    if 'nextPageToken' in comment_threads:
        nextPageExist = True
        page_token = comment_threads['nextPageToken']
    else:
        nextPageExist = False
    

In [5]:
import pandas as pd

comments_df = pd.DataFrame.from_dict(data_dict, "index")
comments_df.head()

Unnamed: 0,video_id,video_title,comment_id,comment_date,comment,like_count
0,xWrtgnCTmj4,Trump delivers remarks in Pennsylvania,UgzxZ38VIvSD1USwIwt4AaABAg,2021-01-27T12:06:05Z,Joe has NEVER had this kind of support....CHEATER,1
1,xWrtgnCTmj4,Trump delivers remarks in Pennsylvania,Ugw-_qckxbOHF47Qzp14AaABAg,2021-01-25T15:08:59Z,A true great president!,1
2,xWrtgnCTmj4,Trump delivers remarks in Pennsylvania,UgwhdHVTiioGpOgzMLN4AaABAg,2021-01-25T10:14:26Z,I wonder what YouTube is going to do with this...,1
3,xWrtgnCTmj4,Trump delivers remarks in Pennsylvania,UgzUCVagqt-mc2kbVV14AaABAg,2021-01-24T10:57:46Z,Is the white house now a old peoples home and ...,0
4,xWrtgnCTmj4,Trump delivers remarks in Pennsylvania,Ugwo2zs4vM4b0hIBjQB4AaABAg,2021-01-24T02:21:26Z,Que Viva el Precidente Donald Trump,0


In [6]:
comments_df['video_id'].unique()

array(['xWrtgnCTmj4'], dtype=object)

In [7]:
comments_df.shape

(5736, 6)

In [7]:
#comments_df.to_csv('comments.csv')

In [10]:
#comments_df.to_csv("trump_rally_comments.csv",index=False)

In [7]:
#comments_df.to_csv("biden_victory_comments.csv",index=False)

In [8]:
#comments_df.to_csv("trump_pennsylvania_comments.csv",index=False)

In [13]:
#comments_df.to_csv("biden_inauguration_comments.csv",index=False)