## Config

In [175]:
# Import dependencies
import tekore as tk
import os
import time
import pandas as pd
import numpy as np
from datetime import datetime
from dateutil.relativedelta import relativedelta

# Environment variables
CLIENTID = os.environ.get('CLIENTID')
CLIENTSECRET = os.environ.get('CLIENTSECRET')
# Set global variables
MINDATE = datetime.today() - relativedelta(years=5)


In [176]:
### Authenticate Tekore
# Get client token
app_token = tk.request_client_token(CLIENTID, CLIENTSECRET)
# Create spotify instance
spotify = tk.Spotify(app_token)

## Read in list of artists

In [163]:
# Read in list of artists
artists = pd.read_csv('artists.csv')
artists

Unnamed: 0,artistName
0,21 Savage
1,3 Doors Down
2,311
3,A Day To Remember
4,A Tribe Called Quest
...,...
1345,Knocked Loose
1346,Lilith Czar
1347,Puscifer
1348,The Bronx


## Get artist IDs

In [164]:
# Create function to search spotify based on artist name
def searchSpotify(artists):
    try:
        # Search spotify for artists based on name
        possibleArtists, = spotify.search(artists['artistName'], types=['artist'], limit=1)
        # Select the first search result
        artists['artistID'] = possibleArtists.items[0].id
        artists['artistPop'] = possibleArtists.items[0].popularity
        artists['artistGenre'] = ", ".join(list(possibleArtists.items[0].genres))
        result = artists
    except: 
        artists['artistID'] = np.nan
        artists['artistPop'] = np.nan
        artists['artistGenre'] = np.nan
        result = artists
    finally:
        # Wait 2 seconds so we do not exceed our API call allowance
        time.sleep(2)
        return result

In [165]:
# Find an artistID for each artist based on artistName
artists = artists.apply(searchSpotify, axis=1)
artists

Unnamed: 0,artistName,artistID,artistPop,artistGenre
0,21 Savage,1URnnhqYAYcrqrcwql10ft,93.0,"atl hip hop, rap"
1,3 Doors Down,2RTUTCvo6onsAnheUk3aL9,72.0,"alternative metal, nu metal, pop rock, post-gr..."
2,311,41Q0HrwWBtuUkJc7C1Rp6K,65.0,"alternative metal, alternative rock, funk meta..."
3,A Day To Remember,4NiJW4q9ichVqL1aUsgGAN,71.0,"metalcore, pop punk, screamo"
4,A Tribe Called Quest,09hVIj6vWgoCDtT03h8ZCa,69.0,"alternative hip hop, conscious hip hop, east c..."
...,...,...,...,...
1345,Knocked Loose,4qrHkx5cgWIslciLXUMrYw,59.0,"kentucky metal, kentucky punk"
1346,Lilith Czar,2tMZ37dVy1DzZjXXCjhgFq,40.0,
1347,Puscifer,2pAajGWerK3ghwToNWFENS,57.0,"alternative metal, alternative rock, industria..."
1348,The Bronx,7nqSDaZKccpnUqjzedyZF4,44.0,"melodic hardcore, skate punk"


In [167]:
# Sort by popularity
artists = artists.sort_values(by='artistPop', ignore_index=True, ascending=False)
# Drop na values
artists = artists.dropna(subset='artistID')
artists

Unnamed: 0,artistName,artistID,artistPop,artistGenre
0,Taylor Swift,06HL4z0CvFAxyc27GXpf02,100.0,pop
1,Bad Bunny,4q3ewBCX7sLwd24euuV69X,99.0,"reggaeton, trap latino, urbano latino"
2,The Weeknd,1Xyo4u8uXC1ZmMpatF05PJ,98.0,"canadian contemporary r&b, canadian pop, pop"
3,Drake,3TVXtAsR1Inumwj472S9r4,97.0,"canadian hip hop, canadian pop, hip hop, rap, ..."
4,Morgan Wallen,4oUHIQIBe0LHzYfvXNW4QM,94.0,contemporary country
...,...,...,...,...
1343,Emo Nite,1VVzQaPmhgjVogHUYcqE0a,1.0,
1344,Ivan Neville's Dumpstaphunk,2ohI68v00kvLwU161NvqBU,0.0,
1345,Electric Forest,2tLUQEzPZNbv2FUzZevFVJ,0.0,
1346,Hot 100,1a2O6kOO33cqsmWKkzCrJd,0.0,


## Get top songs for each

In [147]:
# Define function that will get the top tracks for each artist in the list
def fetchTopSongs(row):
    try:
        # Query the spotify API
        topSongs = spotify.artist_top_tracks(row['artistID'], "US")
        # Iterate over the tracks to find a list of songURIs
        # Limit this to only those songs produced in the last few years; Use only the year (first four digits)
        # Limit this to only three songs per artist
        result = [x.uri for x in topSongs if datetime.strptime(x.album.release_date[:4], '%Y')>MINDATE][:3]
    except: 
        result = [np.nan]
    finally:
        # Wait 2 seconds so we do not exceed our API call allowance
        time.sleep(2)
        return result

In [153]:
# Drop artists who could not be found in the search 
songs1 = artists.copy()
# Find a list of songIDs for each artist
songs1['songURI'] = songs1.apply(fetchTopSongs, axis=1)
# Transform lists of songIDs to multiple rows
songs1 = songs1.explode('songURI', ignore_index=True)
# Drop where songs could not be found
songs1 = songs1.dropna()
songs1

Unnamed: 0,artistName,artistID,songURI
0,21 Savage,1URnnhqYAYcrqrcwql10ft,spotify:track:2dHHgzDwk4BJdRwy9uXhTO
1,21 Savage,1URnnhqYAYcrqrcwql10ft,spotify:track:1bDbXMyjaUIooNwFE9wn0N
2,21 Savage,1URnnhqYAYcrqrcwql10ft,spotify:track:3F5CgOj3wFlRv51JsHbxhe
3,21 Savage,1URnnhqYAYcrqrcwql10ft,spotify:track:2dHHgzDwk4BJdRwy9uXhTO
4,21 Savage,1URnnhqYAYcrqrcwql10ft,spotify:track:1bDbXMyjaUIooNwFE9wn0N
...,...,...,...
8584,Trivium,278ZYwGhdK6QTzE3MFePnP,spotify:track:2tNx0sauNCGfpniMNu9hEh
8585,Trivium,278ZYwGhdK6QTzE3MFePnP,spotify:track:1c7kZbuNixEsCMbU3MZFpy
8586,Trivium,278ZYwGhdK6QTzE3MFePnP,spotify:track:3fnqNxjN7o0tJe7zOQZV68
8587,Trivium,278ZYwGhdK6QTzE3MFePnP,spotify:track:2tNx0sauNCGfpniMNu9hEh


## Load

In [160]:
songs1.to_csv('songs1.csv', index=False)