youtube comment retrival using YOUTUBE DATA API

In [None]:
pip install google-api-python-client


In [None]:

# googleapiclient.discovery is used to interact with Google APIs.
# re is the regular expression module used for pattern matching(method of finding the pattern in the large dataset or information(here it is used to find the pattern of the video id in the video url )).
# time module is used for adding delays in the code.

from googleapiclient.discovery import build
import re
import time

#This function takes a YouTube video URL as input and returns the video ID extracted from it

def video_id(url: str) -> str:
    """Extract the ``video_id`` from a YouTube url."""
    return re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11}).*", url, re.DOTALL).group(1)

"""get_comments function:

This function fetches comments from a YouTube video using YouTube Data API. Here's how it works:

It takes two arguments: video_url (the URL of the YouTube video) and api_key (your API key to access the YouTube Data API).
It extracts the video ID from the provided video URL using the video_id function.
It builds a YouTube API service object using the build function from the googleapiclient.discovery module.
It initializes an empty list all_comments to store all the comments retrieved from the video.
It starts a while loop to fetch comments page by page because YouTube API paginates comments.
Inside the loop:
It makes a request to the YouTube Data API to fetch comments for the given video ID.
It extends the all_comments list with the comments retrieved in the response.
It updates the next_page_token with the token for the next page of comments, if available.
If there are no more pages of comments (next_page_token is None), it breaks out of the loop.
It adds a one-second delay to avoid hitting rate limits.
If an exception occurs during the API request, it prints an error message and breaks out of the loop.
Finally, it returns the list of all comments retrieved."""



def get_comments(video_url, api_key):
    extracted_video_id = video_id(video_url)
    youtube = build('youtube', 'v3', developerKey=api_key)
    all_comments = []

    next_page_token = None
    while True:
        try:
            request = youtube.commentThreads().list(
                part="snippet",
                videoId=extracted_video_id,
                textFormat="plainText",
                pageToken=next_page_token
            )
            response = request.execute()
            all_comments.extend(response['items'])
            next_page_token = response.get('nextPageToken')
            if not next_page_token:
                break
            # Add a delay to manage rate limits
            time.sleep(1)
        except Exception as e:
            print(f"An error occurred: {e}")
            break

    return all_comments

#It calls the get_comments function with a YouTube video URL and an API key, and stores the returned comments in the comments variable.

# Example usage
comments = get_comments('https://youtu.be/53IBdnidxtc?si=4hst2ysY5swtjh3z', 'you_data_api_key')

#Printing the total number of comments retrieved:

print(f"Total comments retrieved: {len(comments)}")

# Display comments one below the other+
#for comment in comments:
   # print(comment['snippet']['topLevelComment']['snippet']['textDisplay'])


Total comments retrieved: 8387
