In [30]:
import pandas as pd
from dotenv import dotenv_values
from googleapiclient.discovery import build
from IPython.display import JSON

config = dotenv_values(".env")
api_key = config["API_key"]

type(api_key) 



str

In [31]:
channel_ids = ['UC8butISFwT-Wl7EV0hUK0BQ', ]
api_service_name = "youtube"
api_version = "v3"
playlist_id = "UU8butISFwT-Wl7EV0hUK0BQ"

youtube = build(
    api_service_name, api_version, developerKey=api_key)


In [32]:
def get_channel_stats(youtube, channel_ids):
    """
    Get channel stats
    
    Params:
    ------
    youtube: build object of Youtube API
    channel_ids: list of channel IDs
    
    Returns:
    ------
    dataframe with all channel stats for each channel ID
    
    """

    all_data = []

    request = youtube.channels().list(
        part="snippet,contentDetails,statistics",
        id=','.join(channel_ids)
    )
    response = request.execute()

    # loop through items
    for item in response['items']:
        data = {'channelName': item['snippet']['title'],
                'subscribers': item['statistics']['subscriberCount'],
                'views': item['statistics']['viewCount'],
                'totalVideos': item['statistics']['videoCount'],
                'playlistId': item['contentDetails']['relatedPlaylists']['uploads']
                }

        all_data.append(data)

    return pd.DataFrame(all_data)


def get_video_ids(youtube, playlist_id):

    video_ids = []

    request = youtube.playlistItems().list(
        part="snippet,contentDetails",
        playlistId=playlist_id,
        maxResults=50
    )
    response = request.execute()

    for item in response['items']:
        video_ids.append(item['contentDetails']['videoId'])

    next_page_token = response.get('nextPageToken')
    while next_page_token is not None:
        request = youtube.playlistItems().list(
            part='contentDetails',
            playlistId=playlist_id,
            maxResults=50,
            pageToken=next_page_token)
        response = request.execute()

        for item in response['items']:
            video_ids.append(item['contentDetails']['videoId'])

        next_page_token = response.get('nextPageToken')

    return video_ids


def get_video_details(youtube, video_ids):

    all_video_info = []

    for i in range(0, len(video_ids), 50):
        request = youtube.videos().list(
            part="snippet,contentDetails,statistics",
            id=','.join(video_ids[i:i+50])
        )
        response = request.execute()

        for video in response['items']:
            stats_to_keep = {'snippet': ['channelTitle', 'title', 'description', 'tags', 'publishedAt'],
                             'statistics': ['viewCount', 'likeCount', 'favouriteCount', 'commentCount'],
                             'contentDetails': ['duration', 'definition', 'caption']
                             }
            video_info = {}
            video_info['video_id'] = video['id']

            for k in stats_to_keep.keys():
                for v in stats_to_keep[k]:
                    try:
                        video_info[v] = video[k][v]
                    except:
                        video_info[v] = None

            all_video_info.append(video_info)

    return pd.DataFrame(all_video_info)


In [33]:
channel_stats = get_channel_stats(youtube, channel_ids)

In [34]:
channel_stats


Unnamed: 0,channelName,subscribers,views,totalVideos,playlistId
0,freeCodeCamp.org,7800000,547218932,1456,UU8butISFwT-Wl7EV0hUK0BQ


In [35]:
# Get video IDs
video_ids = get_video_ids(youtube, playlist_id)
video_df = get_video_details(youtube, video_ids)
video_df


Unnamed: 0,video_id,channelTitle,title,description,tags,publishedAt,viewCount,likeCount,favouriteCount,commentCount,duration,definition,caption
0,jlogLBkPZ2A,freeCodeCamp.org,"Build AI Apps with ChatGPT, DALL-E, and GPT-4 ...",This course will teach you how to build AI-pow...,,2023-05-30T15:14:01Z,23806,1278,,36,PT4H34M58S,hd,false
1,GnodscC2p-A,freeCodeCamp.org,"Build and Deploy a LeetCode Clone with React, ...","In this project tutorial, you will build a Lee...",,2023-05-25T14:33:36Z,42667,1562,,71,PT7H10M,hd,false
2,qrZGfBBlXpk,freeCodeCamp.org,Django ChatGPT Clone Tutorial,Learn how to use Django and the OpenAI API to ...,,2023-05-24T14:53:19Z,28681,995,,47,PT1H33M32S,hd,true
3,FRmCxj9K7II,freeCodeCamp.org,"Full-Stack Next.js, TypeScript, and AWS Course...","Learn how to use Next.js, TypeScript, and AWS ...",,2023-05-22T15:08:54Z,50682,1928,,50,PT6H6M1S,hd,false
4,GizsSo-EevA,freeCodeCamp.org,Use ChatGPT to Code a Full Stack App – Full Co...,Learn how to use ChatGPT for full stack develo...,,2023-05-18T14:08:51Z,131418,4181,,163,PT2H16M31S,hd,false
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1452,_uWzpyr_5qk,freeCodeCamp.org,Computer Basics 5: How To Measure Data Size,http://FreeCodeCamp.com is a community of busy...,"[Free Code Camp, JavaScript, Pair Programming,...",2015-07-17T08:31:23Z,47695,,,32,PT2M24S,hd,false
1453,xXLj5MbrI44,freeCodeCamp.org,Computer Basics 4: Decoding a Binary Number,http://FreeCodeCamp.com is a community of busy...,"[Free Code Camp, JavaScript, Pair Programming,...",2015-07-17T08:31:20Z,62101,,,33,PT1M42S,hd,true
1454,veugT7A9psY,freeCodeCamp.org,Computer Basics 3: Intro to Binary Code,http://FreeCodeCamp.com is a community of busy...,"[Free Code Camp, JavaScript, Pair Programming,...",2015-07-17T08:31:17Z,52721,,,12,PT1M,hd,true
1455,xj9mFD71Vfc,freeCodeCamp.org,Computer Basics 2: More Computer Hardware,http://FreeCodeCamp.com is a community of busy...,"[Free Code Camp, JavaScript, Pair Programming,...",2015-07-17T08:31:14Z,60667,,,16,PT1M12S,hd,true


In [38]:
video_df[['duration']].sort_values('duration')

Unnamed: 0,duration
406,P0D
187,P1DT12H39M22S
99,P1DT1H37M26S
94,P1DT51M37S
147,P1DT7H54M31S
...,...
1322,PT9M53S
1360,PT9M54S
1006,PT9M56S
1252,PT9M5S


In [29]:
video_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1457 entries, 0 to 1456
Data columns (total 13 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   video_id        1457 non-null   object
 1   channelTitle    1457 non-null   object
 2   title           1457 non-null   object
 3   description     1457 non-null   object
 4   tags            966 non-null    object
 5   publishedAt     1457 non-null   object
 6   viewCount       1457 non-null   object
 7   likeCount       1434 non-null   object
 8   favouriteCount  0 non-null      object
 9   commentCount    1457 non-null   object
 10  duration        1457 non-null   object
 11  definition      1457 non-null   object
 12  caption         1457 non-null   object
dtypes: object(13)
memory usage: 148.1+ KB
