In [81]:
import os
import dotenv
from dotenv import load_dotenv
from spotipy.oauth2 import SpotifyClientCredentials #To access authorised Spotify data
from tqdm.notebook import tqdm
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
import time
import random

In [82]:
# get client id and client secret
load_dotenv()
client_id= os.environ.get("SPOTIPY_CLIENT_ID")
client_secret = os.environ.get("SPOTIPY_SECRET")

client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager) #spotify object to access API

In [83]:
def artist_all_tracks(artist):
    
    '''
    Takes a list of artist names, iterates through their Spotify albums (including
    singles, compilations and collaborations), checks for duplicate albums, then
    appends all the tracks in those albums to a list of lists
    '''
    
    # Each list in this list will be a track and its features
    tracks = []
        
    # Get the artist URI (a unique ID)
    artist_uri = sp.search(artist)['tracks']['items'][0]['artists'][0]['uri']

    # Spotify has a lot of duplicate albums, but we'll cross-reference them with this list to avoid extra loops
    album_checker = []
    
    # The starting point of our loop of albums for those artists with more than 50
    n = 0
    
    # Note that here we include singles, compilations and collaborations in the albums to loop through
    while len(sp.artist_albums(artist_uri, album_type = ['album', 'single'], limit=50, offset = n)['items']) > 0:
        
        # Avoid overloading Spotify with requests by assigning the list of album dictionaries to a variable
        dict_list = sp.artist_albums(artist_uri, album_type = ['album', 'single'], limit=50, offset = n)['items']

        for i, album in tqdm(enumerate(dict_list)):

            # Add the featured artists for the album in question to the checklist
            check_this_album = [j['name'] for j in dict_list[i]['artists']]
            # And the album name
            check_this_album.append(dict_list[i]['name'])
            # And its date
            check_this_album.append(dict_list[i]['release_date'])

            # Only continue looping if that album isn't in the checklist
            if check_this_album not in album_checker:
    
                # Add this album to the checker
                album_checker.append(check_this_album)

                # for every song in an album, return data about the album, song title, and song uri
                for song in sp.album_tracks(album['uri'])['items']:
                    tracks.extend([[artist, album['name'], album['uri'], album['release_date'], song['name'], song['uri']]])

                # For every song on the album, get its descriptors and features in a list and add to the tracklist
                # tracks.extend([[artist, album['name'], album['uri'], ['name'], album['release_date']] 
                #   + list(sp.audio_features(sp.album_tracks(album['uri'])['items'].values())) ])
        
        # Go through the next 50 albums (otherwise we'll get an infinite while loop)
        n += 50

    return tracks

In [84]:
def df_tracks(tracklist):
    
    '''
    Takes the output of artist_tracks (i.e. a list of lists),
    puts it in a dataframe and formats it.
    '''

    df = pd.DataFrame(tracklist, columns=['artist',
     'album_name',
     'album_uri',
     'release_date',
     'track',
     'song_uri',
     ] 
     
    #  + list(sp.audio_features('7tr2za8SQg2CI8EDgrdtNl')[0].keys())
     
     )

    df.rename(columns={'uri':'song_uri'}, inplace=True)

    df.drop_duplicates(subset=['artist', 'track', 'release_date'], inplace=True)
    
    return df

In [85]:
def get_audio_features(df):
  # prevent error 429 by waiting 30 seconds before sending request
  time.sleep(30)    
  # Send 50 tracks per request
  batchsize = 50

  # feature lists
  acousticness = []
  instrumentalness = []
  mode = []
  time_signature = []
  energy =[]
  loudness =[]
  speechiness = []
  valence = []
  liveness =[]
  tempo = []
  danceability =[]
  key = []
  duration = []
  popularity = []

  # iterate through each track in batches of 50 songs
  for i in range(0,df.shape[0], batchsize):
    uris = []
    batch = df[i:i+batchsize].reset_index()
    # add each song uri to the uris array
    for track in range(0,batch.shape[0], 1):
      # list of up to 100 uris
      uris.append(batch.iloc[track]['song_uri'])

    # gets audio features of the 100 uris
    features = sp.audio_features(uris)
    tracks= sp.tracks(uris)
    
    for i in range(len(features)):
      if features[i] != None:
        popularity.append(tracks['tracks'][i]['popularity'])
        energy.append(features[i]['energy'])
        acousticness.append(features[i]['acousticness'])
        instrumentalness.append(features[i]['instrumentalness'])
        mode.append(features[i]['mode'])
        time_signature.append(features[i]['time_signature'])
        loudness.append(features[i]['loudness'])
        speechiness.append(features[i]['speechiness'])
        valence.append(features[i]['valence'])
        liveness.append(features[i]['liveness'])
        tempo.append(features[i]['tempo'])
        danceability.append(features[i]['danceability'])
        key.append(features[i]['key'])
        duration.append(features[i]['duration_ms'])
      else:
        popularity.append(0)
        energy.append(0)
        acousticness.append(0)
        instrumentalness.append(0)
        mode.append(0)
        time_signature.append(0)
        loudness.append(0)
        speechiness.append(0)
        valence.append(0)
        liveness.append(0)
        tempo.append(0)
        danceability.append(0)
        key.append(0)
        duration.append(0)

  # Add Columns to Dataframe
  df['popularity'] = popularity
  df['energy'] = energy
  df['acousticness'] = acousticness
  df['instrumentalness'] = instrumentalness
  df['mode'] = mode
  df['time_signature'] = time_signature
  df['loudness'] = loudness
  df['speechiness'] = speechiness
  df['valence'] = valence
  df['liveness'] = liveness
  df['tempo'] = tempo
  df['danceability'] = danceability
  df['key'] = key
  df['duration'] = duration

  # rearrange columns
  cols = ['track', 'album_name', 'release_date', 'album_uri', 'song_uri', 'popularity',
              'acousticness', 'danceability', 'energy', 'instrumentalness', 'liveness',
              'loudness', 'speechiness', 'tempo', 'time_signature', 'mode', 'key', 'valence', 'duration', ]
  df= df[cols]
        
  return df


In [86]:
def get_artist_tracklist(artist):
    tracks = artist_all_tracks(artist)
    tracklist = df_tracks(tracks)
    features = get_audio_features(tracklist)
    return features

In [87]:
artist = 'Kendrick Lamar'
df = get_artist_tracklist(artist)

0it [00:00, ?it/s]

In [88]:
df.sort_values(by=['popularity'], ascending=False)

Unnamed: 0,track,album_name,release_date,album_uri,song_uri,popularity,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,tempo,time_signature,mode,key,valence,duration
2,Pink + White,Blonde,2016-08-20,spotify:album:3mH6qwIy9crq0I9YQbOuDf,spotify:track:3xKsf9qdS1CyvXSMEid6g8,90,0.667,0.545,0.545,5.5e-05,0.417,-7.362,0.107,159.94,3,1,9,0.549,184516
18,Thinkin Bout You,channel ORANGE,2012-07-10,spotify:album:392p3shh2jkxUxY2VHvlH8,spotify:track:7DfFc7a6Rwfi3YQMRbDMau,86,0.409,0.721,0.339,0.00153,0.0973,-11.195,0.0532,129.83,4,1,0,0.2,200747
27,Lost,channel ORANGE,2012-07-10,spotify:album:392p3shh2jkxUxY2VHvlH8,spotify:track:3GZD6HmiNUhxXYf8Gch723,85,0.0272,0.913,0.603,0.000503,0.167,-4.892,0.226,123.061,4,1,8,0.497,234093
1,Ivy,Blonde,2016-08-20,spotify:album:3mH6qwIy9crq0I9YQbOuDf,spotify:track:2ZWlPOoWh0626oTaHrnl2a,83,0.785,0.575,0.386,0.000289,0.229,-9.584,0.037,116.408,4,0,9,0.466,249191
13,White Ferrari,Blonde,2016-08-20,spotify:album:3mH6qwIy9crq0I9YQbOuDf,spotify:track:2LMkwUfqC6S6s6qDVlEuzV,82,0.77,0.501,0.0958,0.0,0.298,-15.605,0.0433,108.741,4,1,0,0.204,248808
8,Nights,Blonde,2016-08-20,spotify:album:3mH6qwIy9crq0I9YQbOuDf,spotify:track:7eqoqGkKwgOaWNNHx90uEZ,80,0.427,0.457,0.551,1e-06,0.113,-9.36,0.167,89.87,4,0,5,0.428,307151
26,Pyramids,channel ORANGE,2012-07-10,spotify:album:392p3shh2jkxUxY2VHvlH8,spotify:track:4QhWbupniDd44EDtnh2bFJ,79,0.0118,0.4,0.65,7.6e-05,0.791,-6.104,0.0466,107.989,4,0,5,0.15,592920
23,Super Rich Kids,channel ORANGE,2012-07-10,spotify:album:392p3shh2jkxUxY2VHvlH8,spotify:track:0725YWm6Z0TpZ6wrNk64Eb,78,0.219,0.739,0.419,0.0,0.196,-8.241,0.0854,60.493,4,0,3,0.58,304760
31,Pink Matter,channel ORANGE,2012-07-10,spotify:album:392p3shh2jkxUxY2VHvlH8,spotify:track:1fOkmYW3ZFkkjIdOZSf596,78,0.951,0.392,0.233,0.000322,0.106,-11.012,0.0587,176.569,3,0,11,0.494,268813
6,Self Control,Blonde,2016-08-20,spotify:album:3mH6qwIy9crq0I9YQbOuDf,spotify:track:5GUYJTQap5F3RDQiCOJhrS,77,0.768,0.581,0.21,0.0,0.369,-10.426,0.0321,80.099,4,1,8,0.452,249668


In [89]:
artist = artist.replace(' ', '_').lower()+'.csv'
df.to_csv('tracks/'+ artist)