<a href="https://colab.research.google.com/github/melindasiburian/Scraping-youtube-comments-using-API/blob/main/Scraping_code_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
from googleapiclient.discovery import build


def get_all_replies(comment_id, youtube):
    """Fetch all replies for a given comment ID, handling pagination."""
    replies_data = []
    response = youtube.comments().list(part='snippet', parentId=comment_id, maxResults=100).execute()

    while response:
        for reply in response['items']:
            reply_published = reply['snippet']['publishedAt']
            reply_user = reply['snippet']['authorDisplayName']
            reply_text = reply['snippet']['textDisplay']
            reply_likeCount = reply['snippet']['likeCount']

            # Append each reply to the list
            replies_data.append([reply_published, reply_user, reply_text, reply_likeCount, "Reply"])

        # Check if there is a next page for replies
        if 'nextPageToken' in response:
            response = youtube.comments().list(part='snippet', parentId=comment_id, pageToken=response['nextPageToken'],
                                               maxResults=100).execute()
        else:
            break

    return replies_data


def video_comments(video_id, api_key):
    # List for storing comments and replies
    comments_data = []

    # Creating youtube resource object
    youtube = build('youtube', 'v3', developerKey=api_key)

    # Retrieve YouTube video results
    video_response = youtube.commentThreads().list(
        part='snippet,replies',
        videoId=video_id,
        maxResults=100
    ).execute()

    # Iterate over video responses to extract comments and replies
    while video_response:

        for item in video_response['items']:
            # Extracting top-level comment information
            top_comment = item['snippet']['topLevelComment']['snippet']
            comment_id = item['snippet']['topLevelComment']['id']
            published = top_comment['publishedAt']
            user = top_comment['authorDisplayName']
            comment = top_comment['textDisplay']
            likeCount = top_comment['likeCount']

            # Append the top-level comment to the list
            comments_data.append([published, user, comment, likeCount, "Comment"])

            # Check for replies to the top-level comment
            if 'replies' in item:
                # Fetching replies using a separate API call if there are too many
                replies_data = get_all_replies(comment_id, youtube)
                comments_data.extend(replies_data)

        # Pagination: checking if there's a next page of comments
        if 'nextPageToken' in video_response:
            video_response = youtube.commentThreads().list(
                part='snippet,replies',
                videoId=video_id,
                pageToken=video_response['nextPageToken'],
                maxResults=100
            ).execute()
        else:
            break

    return comments_data


# Insert your API key here
api_key = "AIzaSyACNmwdF_Sj8D9Sqdc9PBVAjsaczPNRQRM"

# Video ID example: https://www.youtube.com/watch?v=qwWfBoteiy8
video_id = "qwWfBoteiy8"

# Call function to get comments and replies
comments = video_comments(video_id, api_key)

# Creating DataFrame from the list of comments and replies
df = pd.DataFrame(comments, columns=['publishedAt', 'authorDisplayName', 'textDisplay', 'likeCount', 'Type'])

# Save to CSV
df.to_csv('youtube-comments-with-replies.csv', index=False)