In [1]:
!pip install google-api-python-client

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
from googleapiclient.discovery import build
from dateutil import parser
import pandas as pd
from IPython.display import JSON

import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

In [None]:
import pandas as pd
import numpy as np
from dateutil import parser

# Data visualization libraries
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import seaborn as sns
sns.set(style="darkgrid", color_codes=True)

# Google API
from googleapiclient.discovery import build

In [17]:
api_key = 'AIzaSyARrKkGLFu_DtKlwOEPdmX_vIPm9kP7xiQ'
channel_ids = ['UC4tS4Q_Cno5JVcIUXxQOOpA', ]

In [18]:
api_service_name = "youtube"
api_version = "v3"

# Get credentials and create an API client
youtube = build(
api_service_name, api_version,
developerKey=api_key)

In [19]:
def get_channel_stats(youtube, channel_ids):
    """
    Get channel statistics: title, subscriber count, view count, video count, upload playlist
    Params:
    
    youtube: the build object from googleapiclient.discovery
    channels_ids: list of channel IDs
    
    Returns:
    Dataframe containing the channel statistics for all channels in the provided list: title, subscriber count, view count, video count, upload playlist
    
    """
    all_data = []
    request = youtube.channels().list(
                part='snippet,contentDetails,statistics',
                id=','.join(channel_ids))
    response = request.execute() 
    
    for i in range(len(response['items'])):
        data = dict(channelName = response['items'][i]['snippet']['title'],
                    subscribers = response['items'][i]['statistics']['subscriberCount'],
                    views = response['items'][i]['statistics']['viewCount'],
                    totalVideos = response['items'][i]['statistics']['videoCount'],
                    playlistId = response['items'][i]['contentDetails']['relatedPlaylists']['uploads'])
        all_data.append(data)
    
    return pd.DataFrame(all_data)

def get_video_ids(youtube, playlist_id):
    """
    Get list of video IDs of all videos in the given playlist
    Params:
    
    youtube: the build object from googleapiclient.discovery
    playlist_id: playlist ID of the channel
    
    Returns:
    List of video IDs of all videos in the playlist
    
    """
    
    request = youtube.playlistItems().list(
                part='contentDetails',
                playlistId = playlist_id,
                maxResults = 50)
    response = request.execute()
    
    video_ids = []
    
    for i in range(len(response['items'])):
        video_ids.append(response['items'][i]['contentDetails']['videoId'])
        
    next_page_token = response.get('nextPageToken')
    more_pages = True
    
    while more_pages:
        if next_page_token is None:
            more_pages = False
        else:
            request = youtube.playlistItems().list(
                        part='contentDetails',
                        playlistId = playlist_id,
                        maxResults = 50,
                        pageToken = next_page_token)
            response = request.execute()
    
            for i in range(len(response['items'])):
                video_ids.append(response['items'][i]['contentDetails']['videoId'])
            
            next_page_token = response.get('nextPageToken')
        
    return video_ids

def get_video_details(youtube, video_ids):
    """
    Get video statistics of all videos with given IDs
    Params:
    
    youtube: the build object from googleapiclient.discovery
    video_ids: list of video IDs
    
    Returns:
    Dataframe with statistics of videos, i.e.:
        'channelTitle', 'title', 'description', 'tags', 'publishedAt'
        'viewCount', 'likeCount', 'favoriteCount', 'commentCount'
        'duration', 'definition', 'caption'
    """
        
    all_video_info = []
    
    for i in range(0, len(video_ids), 50):
        request = youtube.videos().list(
            part="snippet,contentDetails,statistics",
            id=','.join(video_ids[i:i+50])
        )
        response = request.execute() 

        for video in response['items']:
            stats_to_keep = {'snippet': ['channelTitle', 'title', 'description', 'tags', 'publishedAt'],
                             'statistics': ['viewCount', 'likeCount', 'favouriteCount', 'commentCount'],
                             'contentDetails': ['duration', 'definition', 'caption']
                            }
            video_info = {}
            video_info['video_id'] = video['id']

            for k in stats_to_keep.keys():
                for v in stats_to_keep[k]:
                    try:
                        video_info[v] = video[k][v]
                    except:
                        video_info[v] = None

            all_video_info.append(video_info)
            
    return pd.DataFrame(all_video_info)

def get_comments_in_videos(youtube, video_ids):
    """
    Get top level comments as text from all videos with given IDs (only the first 10 comments due to quote limit of Youtube API)
    Params:
    
    youtube: the build object from googleapiclient.discovery
    video_ids: list of video IDs
    
    Returns:
    Dataframe with video IDs and associated top level comment in text.
    
    """
    all_comments = []
    
    for video_id in video_ids:
        try:   
            request = youtube.commentThreads().list(
                part="snippet,replies",
                videoId=video_id
            )
            response = request.execute()
        
            comments_in_video = [comment['snippet']['topLevelComment']['snippet']['textOriginal'] for comment in response['items'][0:10]]
            comments_in_video_info = {'video_id': video_id, 'comments': comments_in_video}

            all_comments.append(comments_in_video_info)
            
        except: 
            # When error occurs - most likely because comments are disabled on a video
            print('Could not get comments for video ' + video_id)
        
    return pd.DataFrame(all_comments)     

In [20]:
channel_stats = get_channel_stats(youtube, channel_ids)

In [21]:
channel_stats

Unnamed: 0,channelName,subscribers,views,totalVideos,playlistId
0,Ricis Official,31400000,5725254295,2428,UU4tS4Q_Cno5JVcIUXxQOOpA


In [22]:
playlist_id = "UU4tS4Q_Cno5JVcIUXxQOOpA"
video_ids = get_video_ids(youtube,
playlist_id)

video_df = get_video_details(youtube,
video_ids)
video_df.head()

Unnamed: 0,video_id,channelTitle,title,description,tags,publishedAt,viewCount,likeCount,favouriteCount,commentCount,duration,definition,caption
0,5AkjXMSAdWs,Ricis Official,KHADEEJAH MARYAM GA SABAR NUNGGU RICIS PULANG!...,Video baru : https://youtu.be/LCNIIS9Xuvs\n\nC...,"[vlog, ricis, ricistr, ryantr, ricis ryan, dai...",2023-04-04T08:00:01Z,242061,19574,,6963,PT13M49S,hd,False
1,18HUZEuAg6Q,Ricis Official,DIA DAPAT 1 JUTA KARENA FOLLOW SEMUA???,,,2023-04-04T07:04:10Z,273909,20180,,632,PT42S,hd,False
2,MygBm2J3buo,Ricis Official,Parah Banget Gak Ada Yang Follow..,,,2023-04-03T12:27:40Z,115994,10876,,794,PT58S,hd,False
3,LCNIIS9Xuvs,Ricis Official,MOANA MARAH GA MAU PULANG KE JAKARTA.. Aduh Gi...,Video baru : https://youtu.be/MFTiYr17k48\n\nC...,"[vlog, ricis, ricistr, ryantr, ricis ryan, dai...",2023-04-03T08:00:09Z,282119,21101,,6699,PT13M14S,hd,False
4,7Q1KJjQ-xfE,Ricis Official,Berangkat Umroh.,,,2023-04-02T13:31:42Z,80785,8779,,989,PT1M1S,hd,False


In [23]:
video_df.to_csv(r'ricis.csv', index=False)