Get video IDs from playlists

In [None]:
import os
import pandas as pd
from googleapiclient.discovery import build

#YouTube API key
api_key = ''

playlist_ids = ['','']
video_ids_file = 'video_ids.txt'
last_video_id_file = 'last_video_id.txt'

youtube = build('youtube', 'v3', developerKey=api_key)

In [None]:
#function to get video IDs from playlists
def get_video_ids_from_playlists(youtube, playlist_ids):
    video_ids = []
    from_date = ''
    to_date = ''
    for playlist_id in playlist_ids:
        next_page_token = None
        while True:
            playlist_response = youtube.playlistItems().list(
                part='snippet',
                playlistId=playlist_id,
                pageToken=next_page_token,
                maxResults=50
            ).execute()
            for item in playlist_response['items']:
                video_id = item['snippet']['resourceId']['videoId']
                video_response = youtube.videos().list(
                    part='snippet',
                    id=video_id
                ).execute()
                video_published_at = video_response['items'][0]['snippet']['publishedAt']
                if from_date <= video_published_at <= to_date:
                    video_ids.append(video_id)
            next_page_token = playlist_response.get('nextPageToken')
            if not next_page_token:
                break
    return video_ids

In [None]:
#write the video IDs into the file video_ids.txt
video_ids = get_video_ids_from_playlists(youtube, playlist_ids)
with open(video_ids_file, 'w') as f:
    for video_id in video_ids:
        f.write(video_id + '\n')

#set the first video ID as the last video ID in the last_video_id.txt file
with open(last_video_id_file, 'w') as f:
    f.write(video_ids[0])

Get 15k comments from each video ID

In [None]:
comments_file = 'comments.csv'

#get 15k comments from each video ID
def get_comments_from_video(youtube, video_id):
    comments = []
    next_page_token = None
    comment_count = 0
    from_date = ''
    to_date = ''
    while True:
        comment_response = youtube.commentThreads().list(
            part='snippet',
            videoId=video_id,
            pageToken=next_page_token,
            textFormat='plainText',
            maxResults=100
        ).execute()
        for item in comment_response['items']:
            comment = item['snippet']['topLevelComment']['snippet']
            comment_published_at = comment['publishedAt']
            if from_date <= comment_published_at <= to_date:
                comments.append({
                    'Timestamp': comment['publishedAt'],
                    'Username': comment['authorDisplayName'],
                    'VideoID': video_id,
                    'Comment': comment['textDisplay'],
                    'Date': comment['publishedAt'],
                    'LikeCount': comment['likeCount']
                })
            comment_count += 1
            if comment_count >= 15000:
                break
        next_page_token = comment_response.get('nextPageToken')
        if not next_page_token or comment_count >= 15000:
            break
    return comments

In [None]:
#read video IDs from file
with open(video_ids_file, 'r') as f:
    video_ids = [line.strip() for line in f.readlines()]

#read last video ID from file
with open(last_video_id_file, 'r') as f:
    last_video_id = f.read().strip()

#find the index of the last video ID in the list of video IDs
last_video_index = video_ids.index(last_video_id)

In [None]:
#loop through video IDs and get comments
for video_id in video_ids[last_video_index:]:
    comments = get_comments_from_video(youtube, video_id)
    comments_df = pd.DataFrame(comments)
    if os.path.exists(comments_file):
        comments_df.to_csv(comments_file, mode='a', header=False, index=False)
    else:
        comments_df.to_csv(comments_file, index=False)

    #update last video ID
    with open(last_video_id_file, 'w') as f:
        f.write(video_id)

    #check if we've reached the end of the list of video IDs
    if video_id == video_ids[-1]:
        break

Get video statistics (likes, comment count, view count)

In [None]:
#function to get video statistics
def get_video_stats(youtube, video_id):
    video_response = youtube.videos().list(
        part='statistics',
        id=video_id
    ).execute()
    stats = video_response['items'][0]['statistics']
    return {
        'comments': stats['commentCount'],
        'views': stats['viewCount'],
        'likes': stats['likeCount']
    }

In [None]:
#read video IDs from file
with open(video_ids_file, 'r') as f:
    video_ids = [line.strip() for line in f.readlines()]

In [None]:
#get video statistics for each video ID
stats = []
for video_id in video_ids:
    try:
        video_stats = get_video_stats(youtube, video_id)
        stats.append({
            'video_id': video_id,
            'comments': video_stats['comments'],
            'views': video_stats['views'],
            'likes': video_stats['likes']
        })
    except Exception as e:
        print(f"Error getting stats for video {video_id}: {e}")

In [None]:
#save statistics to CSV file
df = pd.DataFrame(stats)
df.to_csv('video_stats.csv', index=False)