In [3]:
# Define the API settings
CLIENT_SECRETS_FILE = "client_secret.json"

SCOPES = ['https://www.googleapis.com/auth/youtube.force-ssl']
API_SERVICE_NAME = 'youtube'
API_VERSION = 'v3'

# Import necessary modules, mostly google apiclient and
# google authentication related modules
import os
import pickle
import google.oauth2.credentials
 
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
 
# function that authenticates with the credentials related
# to Google APIs and returns a service that can be used to
# interact with YouTube API

def get_authenticated_service():
    credentials = None
    if os.path.exists('token.pickle'):
        with open('token.pickle', 'rb') as token:
            credentials = pickle.load(token)
    #  Check if the credentials are invalid or do not exist
    if not credentials or not credentials.valid:
        # Check if the credentials have expired
        if credentials and credentials.expired and credentials.refresh_token:
            credentials.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(
                CLIENT_SECRETS_FILE, SCOPES)
            credentials = flow.run_console()

        # Save the credentials for the next run
        with open('token.pickle', 'wb') as token:
            pickle.dump(credentials, token)

    return build(API_SERVICE_NAME, API_VERSION, credentials = credentials)


In [4]:
# Get the service
os.environ['OAUTHLIB_INSECURE_TRANSPORT'] = '1'
youtube = get_authenticated_service()

Please visit this URL to authorize this application: https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=385788974097-fdba3lngr32rqs8vc5271ps9csd0o4v7.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fyoutube.force-ssl&state=4s0MltqfaLdyzNMoUVk3KEuS5wafvI&prompt=consent&access_type=offline
Enter the authorization code: 4/1AY0e-g4PdKM3X5NC86qpVlZi0cElNTBZwHSs4VvcYNqBAdJR_kukHNhtj8A


### getting playlist id for the channel

In [8]:
channel_content = youtube.channels().list(part="contentDetails",forUsername="voxdotcom").execute()
playlist_id = channel_content['items'][0]['contentDetails']['relatedPlaylists']['uploads']

### extract last 10 videos but all top comment threads by using paging

In [14]:
videos = youtube.playlistItems().list(part="snippet",
                                      maxResults=50,
                                      playlistId=playlist_id).execute()
data_dict = dict()
i = 0
for item in videos['items']:
    video_id = item['snippet']['resourceId']['videoId']
    video_title = item['snippet']['title']
    
    
    first_page = True
    page_token = None
    nextPageExist = True
    while nextPageExist:
        if first_page:
            comment_threads = youtube.commentThreads().list(part="snippet",
                                                            maxResults = 50,
                                                            videoId = video_id).execute()
            first_page = False
        
        else:
            try:
                comment_threads = youtube.commentThreads().list(part="snippet",
                                                                maxResults = 50,
                                                                pageToken = page_token,
                                                                videoId = video_id).execute()
            except:
                print(' a problem happened while trying get comments from video',video_title)
                pass
            
        for comment_thread in comment_threads['items']:
            comment = comment_thread['snippet']['topLevelComment']['snippet']['textDisplay']
            comment_id = comment_thread['snippet']['topLevelComment']['id']
            like_count = comment_thread['snippet']['topLevelComment']['snippet']['likeCount']
            publish_date = comment_thread['snippet']['topLevelComment']['snippet']['publishedAt']
            data_dict[i] = {'video_id':video_id,
                            'video_title':video_title,
                            'comment_id':comment_id,
                            'comment_date':publish_date,
                            'comment':comment,
                            'like_count':like_count}
            i += 1
        
        if 'nextPageToken' in comment_threads:
            nextPageExist = True
            page_token = comment_threads['nextPageToken']
        else:
            nextPageExist = False
    

In [15]:
len(data_dict)

98392

In [16]:
import pandas as pd

comments_df = pd.DataFrame.from_dict(data_dict, "index")
comments_df.head()

Unnamed: 0,video_id,video_title,comment_id,comment_date,comment,like_count
0,hqvOcr0uu9o,The warning signs before the Capitol riot,UgzlBWVMqlnbq4mLjyR4AaABAg,2021-01-20T12:32:30Z,I don&#39;t want to hear blue lived matter any...,0
1,hqvOcr0uu9o,The warning signs before the Capitol riot,UgwufKze5PKyKDAsUpp4AaABAg,2021-01-20T12:17:48Z,The problem is nor left or right. The problem ...,0
2,hqvOcr0uu9o,The warning signs before the Capitol riot,UgzHHHu2fTgXVJXAULJ4AaABAg,2021-01-20T12:15:15Z,These people have to see or know what they hav...,0
3,hqvOcr0uu9o,The warning signs before the Capitol riot,UgynTmOfFaEaW4-I7GB4AaABAg,2021-01-20T12:13:17Z,<b>Are we forgetting democrats trying to impea...,0
4,hqvOcr0uu9o,The warning signs before the Capitol riot,Ugzl9y87AjTFPDHotdl4AaABAg,2021-01-20T11:00:05Z,AmeriKKKa,0


In [18]:
comments_df['video_id'].unique().shape

(50,)

In [7]:
#comments_df.to_csv('comments.csv')

In [20]:
comments_df.to_csv("comments_vox.csv",index=False)

## References
- [Extracting YouTube Comments with YouTube API & Python](https://python.gotrained.com/youtube-api-extracting-comments/)
- [API Reference](https://developers.google.com/youtube/v3/docs) 