#### Plan - Build a youtube api client instance, given a channel id - it should fetch channel statistics including playlist ids, using playlist id - get video ids - fetch and save comments for each video, get video statistics as well and get video length

In [21]:
import pandas as pd
from googleapiclient.discovery import build

In [22]:
def build_youtube(api_key):
    api_service_name = "youtube"
    api_version = "v3"
    youtube = build(
        api_service_name, api_version, developerKey=api_key)
    return youtube

In [36]:
def get_channel_stats(youtube, channel_ids):
    all_data = []
    request = youtube.channels().list(
        part='snippet,contentDetails,statistics',
        id=','.join(channel_ids)
    )
    response = request.execute()

    for item in response['items']:
        data = {
            'Channel_name': item['snippet']['title'],
            'Subscribers': item['statistics']['subscriberCount'],
            'Views': item['statistics']['viewCount'],
            'Total_videos': item['statistics']['videoCount'],
            'playlist_id': item['contentDetails']['relatedPlaylists']['uploads']
        }
        all_data.append(data)
    return all_data

In [24]:
def get_video_ids(youtube, playlist_id):
    video_ids = []
    nextPageToken = None

    while True:
        request = youtube.playlistItems().list(
            part='contentDetails',
            playlistId=playlist_id,
            maxResults=50,
            pageToken=nextPageToken
        )
        response = request.execute()

        for item in response['items']:
            video_ids.append(item['contentDetails']['videoId'])

        nextPageToken = response.get('nextPageToken')
        if not nextPageToken:
            break

    return video_ids


In [45]:
def fetch_and_save_comments(youtube, video_ids):
    comments = []

    for video_id in video_ids:
        try:
            nextPageToken = None

            while True:
                request = youtube.commentThreads().list(
                    part='snippet',
                    videoId=video_id,
                    maxResults=100,
                    pageToken=nextPageToken
                )
                response = request.execute()

                for item in response['items']:
                    comment = item['snippet']['topLevelComment']['snippet']
                    comments.append({
                        'VideoID': video_id,
                        'Comment': comment['textDisplay'],
                        'Author': comment['authorDisplayName'],
                        'PublishedAt': comment['publishedAt']
                    })

                nextPageToken = response.get('nextPageToken')
                if not nextPageToken:
                    break
        except Exception as e:
            if 'commentsDisabled' in str(e):
                print(f"Comments are disabled for the video with ID: {video_id}")
            else:
                print(f"Error fetching comments for video with ID {video_id}: {str(e)}")

    comments_df = pd.DataFrame(comments)
    return comments_df




In [26]:
def parse_duration(duration_str):
    duration_str = duration_str[2:]
    total_seconds = 0

    if 'M' in duration_str:
        minutes_str = duration_str.split('M')[0]
        total_seconds += int(minutes_str) * 60
        duration_str = duration_str.replace(minutes_str + 'M', '')

    if 'S' in duration_str:
        seconds_str = duration_str.split('S')[0]
        total_seconds += int(seconds_str)

    return total_seconds / 60

In [27]:
def get_video_details(youtube, video_ids):
    all_video_stats = []

    for i in range(0, len(video_ids), 50):
        request = youtube.videos().list(
            part='snippet,statistics,contentDetails',
            id=','.join(video_ids[i:i + 50])
        )
        response = request.execute()

        for video in response['items']:
            video_stats = {
                'VideoID': video['id'],
                'Title': video['snippet']['title'],
                'Published_date': video['snippet']['publishedAt'],
                'Views': video['statistics']['viewCount'],
                'Likes': video['statistics']['likeCount'],
                'Comments': video['statistics'].get('commentCount', 0),
                'Duration': parse_duration(video['contentDetails']['duration'])
            }
            all_video_stats.append(video_stats)

    return all_video_stats




In [41]:
if __name__ == "__main__":
    api_key = 'AIzaSyB_eseHcuNXK-zdtnwTH11c2jGHJlmSc7M'
    channel_ids = ['UCV0qA-eDDICsRR9rPcnG7tw']  # Jomatech

    youtube = build_youtube(api_key)
    channel_statistics = get_channel_stats(youtube, channel_ids)
    channel_data = pd.DataFrame(channel_statistics)
    playlist_id = channel_data.loc[0, 'playlist_id']
    video_ids = get_video_ids(youtube, playlist_id)
    comments_df = fetch_and_save_comments(youtube, video_ids)
    video_details = get_video_details(youtube, video_ids)
    

KeyboardInterrupt: 

In [43]:
merged_data = pd.merge(comments_df, pd.DataFrame(video_details), on='VideoID', how='left')
comments = merged_data['Comment'].tolist()

In [44]:
merged_data.to_csv("merged_output.csv", index = False)