In [None]:
import json
import requests
import pandas as pd
import importlib
from requests_oauthlib import OAuth2Session
import keys

In [None]:
def loadIn():
    """
    This function opens a saved token.json file in the same directory, and reads it in in json
    
    Parameters:
    None
    
    Return Values:
    A json object
    """
    token=open('token.json',mode='r')
    token=token.read()
    token=json.loads(token)
    return token

In [None]:
def keychain():
    """
    This function opens a keys.py file in the same directory, and using that keychain, creates an OAuth2 session with our api
    
    Parameters:
    None
    
    Return Values:
    The keys.py file as keychain, an OAuth2 session, and a refresh_url
    """
    importlib.reload(keys)
    keychain = keys.keychain
    client_id = keychain['youtube']['client_id']
    scope = keychain['youtube']['scope']
    redirect = keychain['youtube']['redirect_uris'][0]
    refresh_url = keychain['youtube']['token_uri']
    session = OAuth2Session(client_id, scope=scope, redirect_uri=redirect)
    return keychain,session,refresh_url

In [None]:
def createUrl(token,country=""):
    """
    This function creates a url, which it uses as part of a get 
    request to query the youtube api for information about the most
    popular videos in a given region.
    
    Parameters:
    A valid authorization token, and optionally a country to gather local data 
    
    Return Values:
    A json object with the queried data, a dictionary with the search paramters, and the created url
    """
    D={}
    D['access_token'] = token['access_token']
    D['part'] = ['snippet,contentDetails,statistics']
    D['maxResults'] = '50'
    D['chart'] ='mostPopular'
    D['PageToken'] = 'CAUQAA'
    if country != "":
        D["regionCode"] = country
    #checks if a country was specified
    
    url ='https://www.googleapis.com/youtube/v3/videos'
    popular_vid = requests.get(url, params = D)
    #requests the information for the youtube api
    
    popular_vid = popular_vid.json()
    #turns the recieved information into a json object
   
    return popular_vid,D,url

In [None]:
def arrange(data,vid,country=""):
    """
    This function sorts the gathered data into a dictionary, so it can later be passed into a data frame
    
    Parameters:
    A dictionray, the gathered data, and optionally the country the data was gathered from
    
    Return Values:
    A dictionary
    """
    if country != "" and "CountryCode" not in data:
        data["CountryCode"] = []
    #checks if a country was specified
    
    for item in vid:
        data['videoId'].append(item['id'])
        data['channelId'].append(item['snippet']['channelId'])
        data['categoryId'].append(item['snippet']['categoryId'])
        data['channelTitle'].append(item['snippet']['channelTitle'])
        data['publishedAt'].append(item['snippet']['publishedAt'][11:13])
        if country != "":
            data['CountryCode'].append(country)
    return data

In [None]:
def getData(token,country=""):
    """
    This function, with the use of several helper functions, 
    queries the youtube api, retrieves data, and sorts it into a dictionary.
    This function is the parent function of createUrl and arrange.
    
    Parameters:
    A valid authorization token, and optionally a country to gather local data 
    
    Return Values:
    A dictionary
    """
    data = {'videoId':[], 'channelId':[],'categoryId':[] ,'channelTitle':[], 'publishedAt':[]}
    page = []
    popular_vid,D,url=createUrl(token,country)
    #Uses the helper function createUrl to get information from the youtube api
    
    for i in range(20):
        if 'nextPageToken' not in popular_vid:
            next_page = 'CAUQAA'
        else:
            next_page = popular_vid['nextPageToken']
    #creates a for loop to loop through consecutive pages of information from the youtube api
    #using the nextPageToken
        
        page.append(next_page)
        D['pageToken'] = next_page
        popular_vid = requests.get(url, params = D)
    #requests new pages of information from the youtube api by appending the url
        
        popular_vid = popular_vid.json()
    return arrange(data,popular_vid['items'],country)

In [None]:
def refreshToken(token):
    """
    This function takes an expired token, and refreshes the expired token
    with the refresh token
    
    Paremeters:
    An expired token
    
    Return Value:
    A new token
    """
    keychain_1,session,refresh_url=keychain()
    #Uses the helper function keychain to pull needed information out to refresh
    #the token
    
    token = session.refresh_token(refresh_url, 
                                 client_id=keychain_1['youtube']['client_id'],
                                 client_secret=keychain_1['youtube']['client_secret'],
                                 refresh_token=token['refresh_token'])
    #creates a refreshed session with the youtube api using the refresh token
    #to allowed continued access and authorization
    return token

In [None]:
def categories(token,country=""):
    """
    This function queries the youtube api for the names of each categoryID
    
    Paramters:
    A valid authorization token, and optionally a country to gather local data 
    
    Return Value:
    A json object
    """
    url = 'https://www.googleapis.com/youtube/v3/videoCategories'
    C = {}
    C['access_token'] = token['access_token']
    C['part'] = 'snippet'
    if country!="":
        C['regionCode']=country
    #checks if a country was specified
    
    C['regionCode'] = 'US'
    category_vid = requests.get(url, params = C)
    category_vid = category_vid.json()
    return category_vid

In [None]:
def categoryTable(json):
    """
    This function takes a json object and sorts it into a dictionary
    
    Parameters:
    A json object
    
    Return Values:
    A dictionary
    """
    data_category = {'id':[],'title':[]}
    for item in json:
        data_category['id'].append(item['id'])
        data_category['title'].append(item['snippet']['title'])
    return data_category

In [None]:
def merge(token,df,country=""):
    """
    This function, with the use of several helper functions,
    queries the youtube api so it can merge two dataframes
    together, and apply correct category labels.
    
    Paramters:
    A valid authorization token, a dataframe, and optionally 
    a country
    
    Return Values:
    Exports a .csv file with the merged data
    """
    try:
        category_vid=categories(token)
        data_2=categoryTable(category_vid['items'])
        df1 = pd.DataFrame(data_2)
    #this try except loops tests if the authorization token is valid,
    #and refreshes it if not
    
    except KeyError:
        new_token=refreshToken(token)

        category_vid=categories(new_token)
        data_2=categoryTable(category_vid['items'])
        df1 = pd.DataFrame(data_2)

    result = pd.merge(df, df1, left_on='categoryId', right_on='id')
    result.to_csv('df_'+country+'.csv')

In [None]:
def main(country=""):
    """
    This function queries the youtube api for data about their current
    most popular videos, and returns that information in the form of
    a .csv file.
    
    Parameters:
    Optionally a country to gather local data
    
    Return Values:
    Exports a .csv file with the data
    """
    token=loadIn()
    try:
        data=getData(token,country)
    #this try except loops tests if the authorization token is valid,
    #and refreshes it if not
    
    except KeyError:
        new_token=refreshToken(token)
    
        data=getData(new_token,country)
    df= pd.DataFrame(data)
    
    merge(token,df,country)

In [None]:
main('FR')