In [6]:
from googleapiclient.discovery import build
import googleapiclient.errors
import pandas as pd
import getpass

USER INPUT

In [4]:
api_key = getpass.getpass('Please enter your YouTube API key: ')
playlist_ids = ['PLIivdWyY5sqJwPaahO6caM6u_OYFYgLQL']

Please enter your YouTube API key:  ········


In [7]:
# Build the YouTube client
youtube = build('youtube', 'v3', developerKey=api_key)

GET VIDEO IDs FOR PLAYLIST

In [8]:
def get_all_video_ids_from_playlists(youtube, playlist_ids):
    all_videos = []  # Initialize a single list to hold all video IDs

    for playlist_id in playlist_ids:
        next_page_token = None

        # Fetch videos from the current playlist
        while True:
            playlist_request = youtube.playlistItems().list(
                part='contentDetails',
                playlistId=playlist_id,
                maxResults=50,
                pageToken=next_page_token)
            playlist_response = playlist_request.execute()

            all_videos += [item['contentDetails']['videoId'] for item in playlist_response['items']]

            next_page_token = playlist_response.get('nextPageToken')

            if next_page_token is None:
                break

    return all_videos

# Fetch all video IDs from the specified playlists
video_ids = get_all_video_ids_from_playlists(youtube, playlist_ids)

# Now you can pass video_ids to the next function
# next_function(video_ids)

In [9]:
video_ids

['y0ZTg2Ckjz4', 'fEzEM6ADwXg', 'fJlo7LsG4Mw', 'ObUeheBTBjY', '9xAfEUSjE5o']

Get All Comments

In [12]:
# Function to get replies for a specific comment
def get_replies(youtube, parent_id, video_id):  # Added video_id as an argument
    replies = []
    next_page_token = None

    while True:
        reply_request = youtube.comments().list(
            part="snippet",
            parentId=parent_id,
            textFormat="plainText",
            maxResults=100,
            pageToken=next_page_token
        )
        reply_response = reply_request.execute()

        for item in reply_response['items']:
            comment = item['snippet']
            replies.append({
                'Timestamp': comment['publishedAt'],
                'Username': comment['authorDisplayName'],
                'VideoID': video_id,
                'Comment': comment['textDisplay'],
                'Date': comment['updatedAt'] if 'updatedAt' in comment else comment['publishedAt']
            })

        next_page_token = reply_response.get('nextPageToken')
        if not next_page_token:
            break

    return replies

# Function to get all comments (including replies) for a single video
def get_comments_for_video(youtube, video_id):
    all_comments = []
    next_page_token = None

    while True:
        comment_request = youtube.commentThreads().list(
            part="snippet",
            videoId=video_id,
            pageToken=next_page_token,
            textFormat="plainText",
            maxResults=100
        )
        comment_response = comment_request.execute()

        for item in comment_response['items']:
            top_comment = item['snippet']['topLevelComment']['snippet']
            all_comments.append({
                'Timestamp': top_comment['publishedAt'],
                'Username': top_comment['authorDisplayName'],
                'VideoID': video_id,  # Directly using video_id from function parameter
                'Comment': top_comment['textDisplay'],
                'Date': top_comment['updatedAt'] if 'updatedAt' in top_comment else top_comment['publishedAt']
            })

            # Fetch replies if there are any
            if item['snippet']['totalReplyCount'] > 0:
                all_comments.extend(get_replies(youtube, item['snippet']['topLevelComment']['id'], video_id))

        next_page_token = comment_response.get('nextPageToken')
        if not next_page_token:
            break

    return all_comments

# List to hold all comments from all videos
all_comments = []


for video_id in video_ids:
    video_comments = get_comments_for_video(youtube, video_id)
    all_comments.extend(video_comments)

# Create DataFrame
comments_df = pd.DataFrame(all_comments)

In [13]:
comments_df.head(10)

Unnamed: 0,Timestamp,Username,VideoID,Comment,Date
0,2023-07-11T21:40:58Z,@googlecloudtech,y0ZTg2Ckjz4,Subscribe to Google Cloud Tech → https://goo.g...,2023-07-11T21:40:58Z
1,2024-03-01T15:35:24Z,@kappiv2356,y0ZTg2Ckjz4,Is not the LIMIT runs after query processing i...,2024-03-01T15:35:24Z
2,2023-07-11T21:30:11Z,@googlecloudtech,fEzEM6ADwXg,Subscribe to Google Cloud Tech → https://goo.g...,2023-07-11T21:30:11Z
3,2023-07-16T15:26:51Z,@SyedSajjadAskari,fEzEM6ADwXg,Why is pricing in gcp is difficult to understand,2023-07-16T15:26:51Z
4,2023-07-12T03:42:44Z,@Kondaranjith3,fEzEM6ADwXg,Thank you Google cloud tech from India @ Ranji...,2023-07-12T03:42:44Z
5,2023-07-12T18:13:52Z,@googlecloudtech,fEzEM6ADwXg,We're happy to hear you found this content use...,2023-07-12T18:13:52Z
6,2023-07-11T23:23:42Z,@himanshujaiswal9411,fEzEM6ADwXg,Himanshu Jaiswal from India,2023-07-11T23:23:42Z
7,2023-07-11T21:43:22Z,@googlecloudtech,fJlo7LsG4Mw,Subscribe to Google Cloud Tech → https://goo.g...,2023-07-11T21:43:22Z
8,2023-07-29T16:44:38Z,@Andhruduful,fJlo7LsG4Mw,You have provided huge amount of information i...,2023-07-29T16:44:38Z


Output to CSV

In [14]:
comments_df.to_csv('Documents/youtube_playlistvideo_comments.csv')