# Youtube API (for DSI Project - Youtube EDA)

Youtube API https://developers.google.com/youtube/v3

Youtube API tutorial by https://www.youtube.com/watch?v=D56_Cx36oGY&t=457s&ab_channel=ThuVudataanalytics

## Importing libraries and setting api parameters

In [1]:
from googleapiclient.discovery import build
import pandas as pd
from IPython.display import JSON

In [2]:
api_key = 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'
api_service_name = "youtube"
api_version = "v3"
youtube = build(api_service_name, api_version, developerKey=api_key)

## List of selected channel ID's

In [3]:
channel_ids=["UCSPfJ89aYsRhpDSnZ0vaFCg", #PointyFloppy
             "UCULyOKD5Y9QTKFDikg2UDgw",#Mccan Agility
             "UCC17PDLl8y1dP2BRV5vuseg", #Dogs That - Susan Garett
             "UCqw_HGVtWKcd-owWODwcQsg", #Fanny Gott
             "UCrz5NqpcLxpiy1Wm_ksEV3g", #AbsoluteDog
             "UCMF2E-huI3T7meqJBU-olZg", #OneMindDogs
             "UCxXnIktVdXrSFCE8u1D-26w", #Iwona Golab
             "UCZzFRKsgVMhGTxffpzgTJlQ", #Zak George
             "UC-qnqaajTk6bfs3UZuue6IQ", #Kikopup
             "UCEaf-PDONb_nhyNRmVw9_nA", #Peak Performer
             "UCxsOQBtCjUhYS-j8VYt4KFQ", #Agile Dog Training
             "UCwLs3Ldbl1Ox-fvupUq7ipA", #Jenny Damm
             "UCmLiE85jsHD7tSsfGUoFd1A", #Teresa Kralova
             "UCortXxJdZYbHCrOa3nddr6g", #Larry Krohn
             "UCGBhmRCsK-hdGyKUP-K02zw", #Peter Caine Dog Training
             "UC6vdCX3-G6oDGajvQFreLLA", #Stonnie Dennis
             "UCRyHBkIJ90SZNRyq1iVda5g", #Training Positive
             "UC5QwYlOxcT1higtcJVGzCCg", #Mccan Dog Training
             "UC6z0E8nSfCvelwA3bon_phg", #Will Atherton Canine Training
             "UCL67xKDkCjEjIC0O0M51MJw", #Ogilvie Dogs
             "UC4szPGNKGADcR2X_TNp1l7g", #Fenrir BC
             "UCDMdcpyb3Hp3bQCIIv8HBLQ", #Fenrir Canine Show
             "UCuOmWJkaAAgP2gMgiLvRSIg", #Beckman's Dog Training
             "UCsthsKj5MJ7zPvOtncW8_6w", #The Canine Coach Dog Training
             "UCbupaU5Pekkp-vz681zGPEw", #Bright Dog Academy Training
             "UCqiW2BB9Aed5ITI__nTLPFQ", #Saro Dog Training
             "UC9K_VHBK7r5FyXiJYIfgLNA", #Sarah Walsh
             "UCgyXYNBsex79zG-uV4upHbw", #My Dog Training Spot
             "UCLL9mXGZIRLcSpZiLBxRT0Q", #Rayvon Dog Training
             "UCbwQj676SdnAzTRggTQwJVw", #Andy Krueger Dog Training
            ]

## Creatings API functions

#### Get channel stats

In [4]:
def get_channel_stats(youtube,channel_ids):
    """
    Gets channel stats and upload playlist id's from channel_id's

    Parameters
    ----------
    youtube : google api build instance
        build function instance with setup parameters (from googleapiclient.discovery)
    channel_ids : str or list of str
        channel id or list of channel ids from youtube

    Returns
    -------
    DataFrame
        data frame of channel stats including : title, description, country, publishedAt, viewCount, subscribersCount,
        vieoCount, uploads
        uploads = upload playlist id that we will need to extract all videos in this channel
    """
    
    request = youtube.channels().list(part="snippet,contentDetails,statistics", id=','.join(channel_ids))
    response = request.execute()
    
    all_data = []
    
    for i in range(len(response['items'])):
        try:
            data = dict(channel_title = response['items'][i]['snippet']['title'],
                        channel_desc = response['items'][i]['snippet']['description'],
                        channel_loc = response['items'][i]['snippet']['country'],
                        channel_pub_date = response['items'][i]['snippet']['publishedAt'],
                        channel_view = response['items'][i]['statistics']['viewCount'],
                        channel_sub = response['items'][i]['statistics']['subscriberCount'],
                        channel_video = response['items'][i]['statistics']['videoCount'],
                        channel_playlistId = response['items'][i]['contentDetails']['relatedPlaylists']['uploads'])
        except:
            data = dict(channel_title = response['items'][i]['snippet']['title'],
                        channel_desc = response['items'][i]['snippet']['description'],
                        channel_loc = None,
                        channel_pub_date = response['items'][i]['snippet']['publishedAt'],
                        channel_view = response['items'][i]['statistics']['viewCount'],
                        channel_sub = response['items'][i]['statistics']['subscriberCount'],
                        channel_video = response['items'][i]['statistics']['videoCount'],
                        channel_playlistId = response['items'][i]['contentDetails']['relatedPlaylists']['uploads'])
            
        all_data.append(data)
    
    return pd.DataFrame(all_data)
    

#### Get video id's from channel id's

In [5]:
def get_video_ids(youtube,playlist_id):
    """
    Gets video_id's from each upload playlist id's

    Parameters
    ----------
    youtube : google api build instance
        build function instance with setup parameters (from googleapiclient.discovery)
    playlist_id : str
        upload playlist id of a youtube channel

    Returns
    -------
    list
        a list of video_ids uploaded onto upload playlist id
    """
    
    request = youtube.playlistItems().list(
        part="snippet,ContentDetails",
        playlistId=playlist_id,
        maxResults=50
    )
    response = request.execute()
    
    video_ids = []

    for item in response['items']:
        video_ids.append(item['contentDetails']['videoId'])

    next_page_token = response.get('nextPageToken')

    while next_page_token is not None:
        request = youtube.playlistItems().list(
            part="snippet,ContentDetails", 
            playlistId=playlist_id, 
            maxResults=50, 
            pageToken=next_page_token
        )
        response = request.execute()

        for item in response['items']:
            video_ids.append(item['contentDetails']['videoId'])

        next_page_token = response.get('nextPageToken')

    return video_ids

#### Get video details

In [6]:
def get_video_details(youtube, video_ids):
    """
    Gets video stats form each video_ids

    Parameters
    ----------
    youtube : google api build instance
        build function instance with setup parameters (from googleapiclient.discovery)
    video_ids : str
        video id of a youtube video

    Returns
    -------
    DataFrame
        a dataframe including: channelTitle, title, description, tags, publishedAt, duration, definition, 
        caption, viewCount, likeCount, favoriteCount, commentCount
    """
    
    all_video_info = []
    
    for i in range(0,len(video_ids),50):
        request = youtube.videos().list(
            part="snippet,contentDetails,statistics",
            id=','.join(video_ids[i:i+50])
        )
        response = request.execute()

        for video in response['items']:
            stats_to_keep = {'snippet':['channelTitle','title','description','tags','publishedAt'],
                             'contentDetails':['duration','definition','caption'],
                             'statistics':['viewCount','likeCount','favoriteCount','commentCount']
                            }
            video_info = {}
            video_info['video_id'] = video['id'] #get the keys for the dict

            for k in stats_to_keep.keys():
                for v in stats_to_keep[k]:
                    try: 
                        video_info[v] = video[k][v]
                    except:
                        video_info[v] = None

            all_video_info.append(video_info)

    return pd.DataFrame(all_video_info)

## Get channel data

In [7]:
channel_data = get_channel_stats(youtube,channel_ids)

In [8]:
#export file as csv for other session
channel_data.to_csv('channel_df.csv')

## For each video id's - Get all videos data and all comments data

In [9]:
#Getting videos
video_df = pd.DataFrame()
i=0
for c in channel_data['channel_title'].unique():
    print(f"Getting video info from channel no. {i+1}/{channel_data.shape[0]}: {c}") #showing process status
    #get playlist id
    playlist_id = channel_data.loc[channel_data['channel_title'] == c,'channel_playlistId'].iloc[0]
    
    #get video ids
    video_ids = get_video_ids(youtube,playlist_id)
    
    #get video data
    video_data = get_video_details(youtube,video_ids)
    
    #append data
    video_df = pd.concat([video_df,video_data])
    i+=1

Getting video info from channel no. 1/30: Agile Dog Training
Getting video info from channel no. 2/30: Jenny Damm
Getting video info from channel no. 3/30: Zak George’s Dog Training Revolution
Getting video info from channel no. 4/30: OneMindDogs
Getting video info from channel no. 5/30: Sarah Walsh - Dog Trainer
Getting video info from channel no. 6/30: BrightDog Academy Dog Training
Getting video info from channel no. 7/30: Larry Krohn
Getting video info from channel no. 8/30: Iwona Golab
Getting video info from channel no. 9/30: Peter Caine Dog Training 
Getting video info from channel no. 10/30: McCann Dog Training
Getting video info from channel no. 11/30: absoluteDogs
Getting video info from channel no. 12/30: Ogilvie Dogs
Getting video info from channel no. 13/30: Fenrir Border Collie Show
Getting video info from channel no. 14/30: Andy Krueger Dog Training 
Getting video info from channel no. 15/30: Tereza Králová
Getting video info from channel no. 16/30: Beckman's Dog Trainin

## Exporting as csv for future reference

In [10]:
video_df.to_csv('video_df.csv',index=False)