In [71]:
##### Retrieve Youtube video details using Python and the Youtube Data API

In [None]:
from googleapiclient.discovery import build
import pandas as pd
import seaborn as sb
import os
import csv

In [None]:
api_key = 'AIzaSyBljWr59T7h2IYoAE21MHePssJ9PMywnOY'

HandleList = ["@SQLBI", "@Tableau", "@Databricks" ]

api_service_name = 'youtube'
api_version = 'v3'

ytdata = build(api_service_name, api_version, developerKey=api_key)

In [None]:
## Test Code - get the channel data using googleapi's channel list function - passing Channel ID


def get_channel_stats(ytdata, channel_id):
    request = ytdata.channels().list(part="snippet,contentDetails,statistics", 
        id=channel_id)
    response = request.execute()
    
    ## response is a json object with different details of the channel
    ## parse the json response
    
    ytinfo = dict(Channel = response['items'][0]['snippet']['title'],
                  Subscribers = response['items'][0]['statistics']['subscriberCount'],
                  Views = response['items'][0]['statistics']['viewCount'],
                  Videos = response['items'][0]['statistics']['videoCount']
                    )
    
    return ytinfo


In [None]:
get_channel_stats(ytdata, channel_id)

In [None]:
## Test Code - get the channel data using googleapi's channel list function - passing Channel's Handle
Handle = "@SQLBI"

def get_handle_stats(ytdata, Handle):
    request = ytdata.channels().list(part="snippet,contentDetails,statistics", 
        forHandle=Handle)
    response = request.execute()
    
    ## response is a json object with different details of the channel
    ## parse the json response
    
    ytinfo = dict(Channel = response['items'][0]['snippet']['title'],
                  ChannelID = response['items'][0]['id'],
                  Subscribers = response['items'][0]['statistics']['subscriberCount'],
                  Views = response['items'][0]['statistics']['viewCount'],
                  Videos = response['items'][0]['statistics']['videoCount']
                    )
    
    #return ytinfo
    return response

In [None]:
get_handle_stats(ytdata, Handle)

In [None]:
## Process code from here - get the channel data using googleapi's channel list function - passing multiple Channel Handles

def get_handle_stats(ytdata, HandleList):
    
    all_ytdata = []
    
    for handle in HandleList:
        request = ytdata.channels().list(part="snippet,contentDetails,statistics", 
            forHandle= handle)
        response = request.execute()
    
    ## response is a json object with different details of the channel
    ## parse the json response
    ## playlistid below can be used to get the list of all videos uploaded to this channel
    
        ytinfo = dict(Channel = response['items'][0]['snippet']['title'],
                      ChannelID = response['items'][0]['id'],
                      Subscribers = response['items'][0]['statistics']['subscriberCount'],
                      Views = response['items'][0]['statistics']['viewCount'],
                      Videos = response['items'][0]['statistics']['videoCount'],
                      PlaylistID = response['items'][0]['contentDetails']['relatedPlaylists']['uploads']
                        )
                   
        all_ytdata.append(ytinfo)
    
    return all_ytdata

In [None]:
channel_stats = get_handle_stats(ytdata, HandleList)
channel_data = pd.DataFrame(channel_stats)

In [None]:
channel_data['Subscribers'] = pd.to_numeric(channel_data['Subscribers'])
channel_data['Views'] = pd.to_numeric(channel_data['Views'])
channel_data['Videos'] = pd.to_numeric(channel_data['Videos'])
#channel_data.dtypes
channel_data
#ChannelID and PlaylistID seems to give the same details

In [None]:
## Verifying the values 

playlistid1 = channel_data.loc[channel_data['Channel']=='SQLBI', 'PlaylistID'].iloc[0]
type(playlistid1)
channel_data.iloc[0]["Channel"]

In [None]:
def get_video_ids(ytdata, playlistid):
    
    request = ytdata.playlistItems().list(
        part="contentDetails",
        playlistId=playlistid,
        maxResults=50
    )
    response = request.execute()
    
    video_ids = []
    
    for i in range(len(response['items'])):
        video_ids.append(response['items'][i]['contentDetails']['videoId'])
        
    next_page_token = response.get('nextPageToken')    
    more_pages = True
    
    while more_pages:
        if next_page_token is None:
            more_pages = False
        else:
            request = ytdata.playlistItems().list(
                    part="contentDetails",
                    playlistId=playlistid,
                    maxResults=50,
                    pageToken=next_page_token
                )
            response = request.execute()

            for i in range(len(response['items'])):
                video_ids.append(response['items'][i]['contentDetails']['videoId'])

            next_page_token = response.get('nextPageToken')                
    return video_ids

In [None]:
def get_video_details(ytdata, video_ids):
    
    ##only 50 videos can be processed at time. To get all videoids in the channel we need to loop through
    all_video_stats = []
    
    for i in range(0, len(video_ids), 50): 
        request = ytdata.videos().list(
                part = "snippet, statistics",
                id = ','.join(video_ids[i:i+50]))
        response = request.execute()
        
        for video in response['items']:
            video_stats = dict(Title = video['snippet']['title'],
                                Video_id = video['id'],
                                Published_datetime = video['snippet']['publishedAt'],
                                Views = video['statistics']['viewCount'],
                                Likes = video['statistics']['likeCount'],
                                #Dislikes = video['statistics']['dislikeCount'],
                                Favorites = video['statistics']['favoriteCount'],
                                Comments = video['statistics']['commentCount']                           
                              )
            all_video_stats.append(video_stats)
    
    return all_video_stats

In [None]:
## Loop through all the PlaylistIDs of the Channels to get the entire list of Video Ids. 

def get_video_data(channel_data, ytdata):
    video_data = pd.DataFrame()

    for _, row in channel_data.iterrows():
        playlist_id = row["PlaylistID"]
        channel = row["Channel"]
        
        # Get video IDs for the current playlist
        video_ids = get_video_ids(ytdata, playlist_id)
        
        # Get details for each video
        video_details = get_video_details(ytdata, video_ids, channel)
        
        # Append the new video details to the main DataFrame
        video_data = pd.concat([video_data, pd.DataFrame(video_details)], ignore_index=True)

    return video_data

In [None]:
video_data = get_video_data(channel_data, ytdata)

In [None]:
video_data['Published_date'] = pd.to_datetime(video_data['Published_datetime']).dt.date
video_data['Views'] = pd.to_numeric(video_data['Views'])
video_data['Likes'] = pd.to_numeric(video_data['Likes'])
video_data['Favorites'] = pd.to_numeric(video_data['Favorites'])
video_data['Comments'] = pd.to_numeric(video_data['Comments'])
video_data

In [None]:
os.chdir("../Data")
print(os.getcwd()) 

In [None]:
video_data.to_csv("YTVideoDetails.csv", mode='w', index=False)