## Config

In [82]:
# Import dependencies
import tekore as tk
import os
import time
import pandas as pd
import numpy as np
from datetime import datetime
from dateutil.relativedelta import relativedelta

# Environment variables
CLIENTID = os.environ.get('CLIENTID')
CLIENTSECRET = os.environ.get('CLIENTSECRET')
# Set global variables
MINDATE = datetime.today() - relativedelta(years=25)


In [34]:
### Authenticate Tekore
# Get client token
app_token = tk.request_client_token(CLIENTID, CLIENTSECRET)
# Create spotify instance
spotify = tk.Spotify(app_token)

## Read in list of artists

In [137]:
# Read in list of artists
artists = pd.read_csv('artists.csv')
# Subset for testing
artists = artists[:5]
artists

Unnamed: 0,artistName
0,21 Savage
1,3 Doors Down
2,311
3,A Day To Remember
4,A Tribe Called Quest


## Get artist IDs

In [138]:
# Create function to search spotify based on artist name
def searchSpotify(row):
    try:
        # Search spotify for artists based on name
        possibleArtists, = spotify.search(row['artistName'], types=['artist'], limit=1)
        # Select the first search result
        result = possibleArtists.items[0].id
    except: 
        result = np.nan
    finally:
        # Wait 2 seconds so we do not exceed our API call allowance
        time.sleep(2)
        return result
    


In [139]:
# Find an artistID for each artist based on artistName
artists['artistID'] = artists.apply(searchSpotify, axis=1)
artists

Unnamed: 0,artistName,artistID
0,21 Savage,1URnnhqYAYcrqrcwql10ft
1,3 Doors Down,2RTUTCvo6onsAnheUk3aL9
2,311,41Q0HrwWBtuUkJc7C1Rp6K
3,A Day To Remember,4NiJW4q9ichVqL1aUsgGAN
4,A Tribe Called Quest,09hVIj6vWgoCDtT03h8ZCa


## Get top songs for each

In [140]:
# Define function that will get the top tracks for each artist in the list
def fetchTopSongs(row):
    try:
        # Query the spotify API
        topSongs = spotify.artist_top_tracks(row['artistID'], "US")
        # Iterate over the tracks to find a list of songURIs
        # Limit this to only those songs produced in the last few years; Use only the year (first four digits)
        # Limit this to only three songs per artist
        result = [x.uri for x in topSongs if datetime.strptime(x.album.release_date[:4], '%Y')>MINDATE][:3]
    except: 
        result = [np.nan]
    finally:
        # Wait 2 seconds so we do not exceed our API call allowance
        time.sleep(2)
        return result

In [141]:
# Drop artists who could not be found in the search 
artists = artists.dropna()
# Find a list of songIDs for each artist
artists['songURI'] = artists.apply(fetchTopSongs, axis=1)
# Transform lists of songIDs to multiple rows
artists = artists.explode('songURI', ignore_index=True)
artists

Unnamed: 0,artistName,artistID,songURI
0,21 Savage,1URnnhqYAYcrqrcwql10ft,spotify:track:2dHHgzDwk4BJdRwy9uXhTO
1,21 Savage,1URnnhqYAYcrqrcwql10ft,spotify:track:1bDbXMyjaUIooNwFE9wn0N
2,21 Savage,1URnnhqYAYcrqrcwql10ft,spotify:track:3F5CgOj3wFlRv51JsHbxhe
3,3 Doors Down,2RTUTCvo6onsAnheUk3aL9,spotify:track:6ZOBP3NvffbU4SZcrnt1k6
4,3 Doors Down,2RTUTCvo6onsAnheUk3aL9,spotify:track:3NLrRZoMF0Lx6zTlYqeIo4
5,3 Doors Down,2RTUTCvo6onsAnheUk3aL9,spotify:track:3WbphvawbMZ8FyqDxYGdSQ
6,311,41Q0HrwWBtuUkJc7C1Rp6K,spotify:track:6Fe3Flc9SjE03pqwD6PVQl
7,311,41Q0HrwWBtuUkJc7C1Rp6K,spotify:track:3mtukCAgd0mE260QcBfXAX
8,311,41Q0HrwWBtuUkJc7C1Rp6K,spotify:track:4cSSUpptr7r125fK5nRJhP
9,A Day To Remember,4NiJW4q9ichVqL1aUsgGAN,spotify:track:1KHKeIouP04dDtl0EetgED


## Load

In [142]:
artists.to_csv('songs1.csv', index=False)