In [4]:
api_key = 'Your_API_Key'

In [17]:
from googleapiclient.discovery import build
from dateutil import parser
import pandas as pd
from IPython.display import JSON

In [18]:
channel_ids = ['UCoOae5nYA7VqaXzerajD0lg']

In [19]:
api_service_name = "youtube"
api_version = "v3"

# Get credentials and create an API client
youtube = build(
    api_service_name, api_version, developerKey=api_key)

In [20]:
def get_channel_stats(youtube, channel_ids):

    """
    Get channel stats

    Params:
    ------
    youtube: build object of Youtube API
    channel_ids: list of channel IDs

    Returns:
    ------
    dataframe with all channel stats for each channel ID

    """

    all_data = []

    request = youtube.channels().list(
        part="snippet,contentDetails,statistics",
        id=','.join(channel_ids)
    )
    response = request.execute()

    # loop through items
    for item in response['items']:
        data = {'channelName': item['snippet']['title'],
                'subscribers': item['statistics']['subscriberCount'],
                'views': item['statistics']['viewCount'],
                'totalVideos': item['statistics']['videoCount'],
                'playlistId': item['contentDetails']['relatedPlaylists']['uploads']
        }

        all_data.append(data)

    return pd.DataFrame(all_data)

def get_video_ids(youtube, playlist_id):

    video_ids = []

    request = youtube.playlistItems().list(
        part="snippet,contentDetails",
        playlistId=playlist_id,
        maxResults = 50
    )
    response = request.execute()

    for item in response['items']:
        video_ids.append(item['contentDetails']['videoId'])

    next_page_token = response.get('nextPageToken')
    while next_page_token is not None:
        request = youtube.playlistItems().list(
                    part='contentDetails',
                    playlistId = playlist_id,
                    maxResults = 50,
                    pageToken = next_page_token)
        response = request.execute()

        for item in response['items']:
            video_ids.append(item['contentDetails']['videoId'])

        next_page_token = response.get('nextPageToken')

    return video_ids

In [21]:
def get_video_details(youtube, video_ids):

    all_video_info = []

    for i in range(0, len(video_ids), 50):
        request = youtube.videos().list(
            part="snippet,contentDetails,statistics",
            id=','.join(video_ids[i:i+50])
        )
        response = request.execute()

        for video in response['items']:
            stats_to_keep = {'snippet': ['channelTitle', 'title', 'description', 'tags', 'publishedAt'],
                             'statistics': ['viewCount', 'likeCount', 'favouriteCount', 'commentCount'],
                             'contentDetails': ['duration', 'definition', 'caption']
                            }
            video_info = {}
            video_info['video_id'] = video['id']

            for k in stats_to_keep.keys():
                for v in stats_to_keep[k]:
                    try:
                        video_info[v] = video[k][v]
                    except:
                        video_info[v] = None

            all_video_info.append(video_info)

    return pd.DataFrame(all_video_info)

In [22]:
channel_stats = get_channel_stats(youtube, channel_ids)

In [23]:
channel_stats

Unnamed: 0,channelName,subscribers,views,totalVideos,playlistId
0,Ali Abdaal,5400000,413404029,872,UUoOae5nYA7VqaXzerajD0lg


In [24]:
playlist_id = "UUoOae5nYA7VqaXzerajD0lg"

In [25]:
# Get video IDs
video_ids = get_video_ids(youtube, playlist_id)

In [26]:
len(video_ids)

869

In [27]:
# Get video details
video_df = get_video_details(youtube, video_ids)
video_df

Unnamed: 0,video_id,channelTitle,title,description,tags,publishedAt,viewCount,likeCount,favouriteCount,commentCount,duration,definition,caption
0,IFSMQJCda10,Ali Abdaal,5 Actionable Ways to Become More Self-Disciplined,#ad Let BetterHelp connect you to a therapist ...,"[Ali Abdaal, Ali abdal, how to be more discipl...",2024-04-12T13:00:27Z,19665,1620,,95,PT27M41S,hd,false
1,B7SNUIq6j1w,Ali Abdaal,Emotions Are Crucial In Any Productivity Frame...,Check out my New York Times bestselling book a...,"[Ali Abdaal, Ali abdal]",2024-04-12T12:00:19Z,3747,307,,6,PT39S,hd,false
2,o_F82o-0IoY,Ali Abdaal,Enjoy Every Step Towards Your Goals 🎯,Check out my New York Times bestselling book a...,"[Ali Abdaal, Ali abdal]",2024-04-11T12:00:49Z,12758,1049,,7,PT38S,hd,false
3,5k3uMaJN4IU,Ali Abdaal,Make 2024 The Best Year of Your Life 🎉,Check out my New York Times bestselling book a...,"[Ali Abdaal, Ali abdal, feel good productivity...",2024-04-10T12:00:05Z,12112,705,,6,PT49S,hd,false
4,KMFR0kj0CE0,Ali Abdaal,Level Up Every Area Of Your Life With These Pr...,Check out my New York Times bestselling book a...,"[Ali Abdaal, Ali abdal, productivity, feel goo...",2024-04-09T12:00:17Z,14501,756,,5,PT38S,hd,false
...,...,...,...,...,...,...,...,...,...,...,...,...,...
864,7APfFjfnNBc,Ali Abdaal,Payphone (Maroon 5) - Katherine Macfarland & A...,"First cover of 2017! We both love this song, s...","[payphone, maroon 5, acoustic cover, cover]",2017-01-04T23:13:51Z,93974,2289,,145,PT2M35S,hd,false
865,gBpBLTSCb4s,Ali Abdaal,6med UKCAT Crash Course - In a Nutshell,The 6med UKCAT Crash Course is the most effect...,"[6med, medical school, ukcat, medicine]",2016-05-09T18:45:18Z,16907,124,,6,PT2M11S,hd,false
866,Va0tSdPADpA,Ali Abdaal,6med BMAT Crash Course - In a Nutshell,The 6med BMAT Crash Course is the most effecti...,"[BMAT, 6med, medicine, medical students]",2016-04-27T09:04:49Z,41389,298,,9,PT2M18S,hd,false
867,XJHM0fEH3ss,Ali Abdaal,Rolling in the Deep (Adele) - Duranka Perera &...,Our first 'proper' video! One of our all-time ...,"[rolling in the deep, adele, cover, acoustic]",2016-04-05T09:05:29Z,63587,776,,39,PT2M29S,hd,false


In [28]:
video_df.isnull().any()

video_id          False
channelTitle      False
title             False
description       False
tags               True
publishedAt       False
viewCount         False
likeCount         False
favouriteCount     True
commentCount      False
duration          False
definition        False
caption           False
dtype: bool

In [29]:
video_df.to_csv('dataset.csv', index=False)