In [51]:
#%%writefile 'retreive_youtube_videos.py'

"""

This script to allow user to enter a YouTube channel ID, and retreive a csv file containing video titles for each
playlist.

This script requires `build`, 'Numpy', 'Pandas', 'Selenium' and 'BeautifulSoup' to be installed within the Python
environment you are running this script in.

Plan:

1) user_input for channel ID
2) Scrape list of playlist IDs for this channel
3) For each playlist, retreive the list of video titles it contains
4) Allow user to filter by playlist name
5) Allow user to download the .csv (use utils)
6) Final outputs: dictionary of dataframes (2 columns  ), and a single csv file (downloaded...utils) containing
    all the data (video title, playlist title) together
7) You can get the program to return all deleted videos (i.e. videos that were found in a previously returned file
   , but not returned this time...)

"""

import numpy as np
import pandas as pd
from apiclient.discovery import build

# CHECK CONVENTION FOR WHERE TO DEFINE THESE
api_key = 'AIzaSyClBWQExtc0jYq8-xfmc69u_uAA-0HbDNI'

youtube = build('youtube', 'v3', developerKey = api_key)

channel_id = 'UC8AaEH9rzM0dAxP_3vGdpmg'

In [94]:
def main():
    
    user_input = None
    
    while user_input not in {'yes', 'y'}:
        
        user_input = input('Do you have a YouTube API key? [yes/no]')
        
        if user_input.lower() in {'n', 'no'}:
            print('\n You will need to get an API key. This is a very quick process. \
            \n\n Please follow the instructions here: \
            \n https://developers.google.com/youtube/registering_an_application')
    
    youtube = None
    
    while youtube == None:
        user_input = input('Pleade enter your API key:')
        
        try: 
            youtube = build('youtube', 'v3', developerKey = user_input)
        
        except:
            print('Key invalid.')
    
    user_input = True
    
    while user_input:
        user_input = input('Please enter your Channel ID. This can be found by signing in to YouTube and looking at URL of the \'Your channel\' page. It can also be found by clicking on \'Advanced settings\'. \n\n')
    
        # Run custom function to get playlists for given channel_id
        try:
            df_playlists = get_playlists(user_input)
            print('\nDownloading videos...')
            user_input = False
            
        except:
            print('Channel ID invalid.')
    
    # Run custom function to get videos for each playlist
    video_data = [get_videos(i) for i in df_playlists['Playlist ID']]
    
    # Concatenate each dataframe containing videos for each playlist
    df = pd.concat(video_data)
    
    # Sort alphabetically by playist name, but preseve order of videos within each playlist
    df = df.rename_axis('Index').sort_values(by = ['Playlist Name', 'Index'], ascending = [True, True])
    
    # Clean final table (drop any duplicates retreived, reset index)
    df = df.drop_duplicates().reset_index(inplace = False)
    df.drop(columns = ['Index'], inplace = True)
    df.reset_index(inplace = True, drop = True)
    
    print(df)
    
    print('\nDownload complete!')
    
        
    user_input = input('Would you like to download the result as a .csv file? [yes/no]')
    
    if user_input.lower() in {'y', 'yes'}:
        df.to_csv("./my_youtube_videos.csv", index = False)
        print('File downloaded!')
    
    return df

In [89]:
def get_playlists(channel_id):

    channel_request = youtube.playlists().list(part = 'snippet', channelId = channel_id).execute()
    
    next_page_token = channel_request.get('nextPageToken')
    
    while 'nextPageToken' in channel_request:
    
        next_page = youtube.playlists()\
        .list(part = 'snippet',channelId = channel_id, pageToken = next_page_token).execute()
    
        channel_request['items']  += next_page['items']
    
        if 'nextPageToken' not in next_page:
            channel_request.pop('nextPageToken')
        
        else:
            next_page_token = next_page['nextPageToken']
    
    playlists = [i['snippet']['title'] for i in channel_request.get('items')]
    
    playlist_ids = [i['id'] for i in channel_request.get('items')]
    
    df = pd.DataFrame({'Playlist Name':pd.Series(playlists), 'Playlist ID': pd.Series(playlist_ids)})
    
    return df

In [90]:
def get_videos(playlist_id):
    
    playlist_request = youtube.playlistItems().list(part = 'snippet', playlistId = playlist_id).execute()
    
    next_page_token = None
    
    while 'nextPageToken' in playlist_request:
    
        next_page = youtube.playlistItems()\
        .list(part = 'snippet', playlistId = playlist_id, pageToken = next_page_token ).execute()
    
        playlist_request['items']  += next_page['items']
    
        if 'nextPageToken' not in next_page:
            playlist_request.pop('nextPageToken')
        
        else:
            next_page_token = next_page['nextPageToken']
            
    #videos = list(set([i['snippet']['title'] for i in playlist_request.get('items')]))
    
    videos = [i['snippet']['title'] for i in playlist_request.get('items')] 
    
    # Get playlist name for given playlist_id, using the custom get_playlists() function
    playlist_col = get_playlists(channel_id)
    playlist_col = playlist_col[playlist_col['Playlist ID'] == playlist_id]['Playlist Name']
    playlist_col= playlist_col.values[0]
    
    # Create df for videos, and set every value of playlist Name as the same
    df = pd.DataFrame({'Playlist Name':playlist_col, 'Video Title':pd.Series(videos)})
    
    return df

In [95]:
if __name__ == '__main__':
    main()

Do you have a YouTube API key? [yes/no]y
Pleade enter your API key:AIzaSyClBWQExtc0jYq8-xfmc69u_uAA-0HbDNI
Please enter your Channel ID. This can be found by signing in to YouTube and looking at URL of the 'Your channel' page. It can also be found by clicking on 'Advanced settings'. 

UC8AaEH9rzM0dAxP_3vGdpmg

Downloading videos...




     Playlist Name                                     Video Title
0       Acid House        Gesloten Cirkel -  Zombie Machine (Acid)
1       Acid House                         Smallpeople - Black Ice
2       Acid House          Smallpeople -Down Over Me [Smallville]
3       Acid House                                  Lizz - Crampeu
4       Acid House                  B. Unknown Artist - B [VWV001]
...            ...                                             ...
1369       Vintage                Inner City - Big Fun (12' remix)
1370       Vintage        Make It Last Forever (with Jacci McGhee)
1371       Vintage  The Brothers Johnson - Stomp! (Official Video)
1372       Vintage                    90's Ecstasy Clubland Part 1
1373       Vintage                 Big Hair and Groovy 80s dancing

[1374 rows x 2 columns]

Download complete!
Would you like to download the result as a .csv file? [yes/no]yes
File downloading...
File downloaded!


In [43]:
import os

In [44]:
os.getcwd()

'/Users/samirkhonji/retrieve-youtube-videos'

In [96]:
df = pd.DataFrame({'Gas':pd.Series(['na', 'na', 'na'])})

In [98]:
df.to_csv("./my_youtube_videos.csv", index = False)