In [1]:
import requests
import pandas as pd
import base64

Define utility functions to fetch data from Spotify Web API

In [2]:
def get_token(client_id, client_secret):
    '''
    Function that receives client_id and client_secret from the Spotify app
    and returns a valid token for using with the API.
    '''
    encoded = base64.b64encode(bytes(client_id+':'+client_secret, 'utf-8'))
    params = {'grant_type':'client_credentials'}
    header = {'Authorization': 'Basic ' + str(encoded, 'utf-8')}
    r = requests.post('https://accounts.spotify.com/api/token', headers=header, data=params)
    if r.status_code != 200:
        print('Error during the request.', r.json())
        return None
    print('Token valid for {} seconds.'.format(r.json()['expires_in']))
    return r.json()['access_token']

def search_artist_id(string, token):
    '''
    Receives a search string and a valid token and returns the most popular artist matching that string,
    including artist ID. If the search has 0 results, the return value is (string, None) .
    '''
    ep_search = 'https://api.spotify.com/v1/search'
    header = {'Authorization': f'Bearer {token}'}
    search_params = {'q': string, 'type': 'artist'} #, 'market': 'AR'}
    r = requests.get(ep_search, headers=header, params=search_params)

    if r.status_code == 200:
        if len(r.json()['artists']['items']) > 0:
            data = tuple(pd.DataFrame(r.json()['artists']['items']).sort_values(by='popularity',ascending=False).loc[0,['name', 'id']].values)
            if data[0].lower() != string.lower():
                print(f'WARNING: artist found differs from search string. Searched for {string}, found {data[0]}')
            return (string, data[1])
        else:
            print('Artist not found:', string)
            return (string, None)

    else:
        print('Error during the request.')
        print(r.status_code)
        print(r.text)
        
def get_artist_info(artist_id, token):
    '''
    Receives artist_id and a valid token and returns amount of followers, genres and popularity of the artist.
    '''
    header = {'Authorization': f'Bearer {token}'}
    r = requests.get(f'https://api.spotify.com/v1/artists/{artist_id}', headers=header)
    if r.status_code == 200:
        return {'followers': r.json()['followers']['total'], 
                'genres': r.json()['genres'], 
                'popularity': r.json()['popularity']}
    else:
        print('Error during the request.')
        print(r.status_code)
        print(r.text)

def get_artist_albums(artist_id, token):
    '''
    Receives artist_id and a valid token and returns a list of dictionary of albums,
    where keys correspond to release dates and values to the names of the albums.
    '''
    header = {'Authorization': f'Bearer {token}'}
    r = requests.get(f'https://api.spotify.com/v1/artists/{artist_id}/albums?limit=50&include_groups=album,single,appears_on', headers=header)
    if r.status_code == 200:
        albums = dict()
        for item in r.json()['items']:
            albums[item['release_date']] = item['name']
        
        next_ = r.json()['next']
        while next_ != None:
            r = requests.get(next_, headers=header)
            if r.status_code == 200:
                for item in r.json()['items']:
                    albums[item['release_date']] = item['name']
                next_ = r.json()['next']
        
        return albums
    
    else:
        print('Error during the request.')
        print(r.status_code)
        print(r.text)
        
def get_related_artists(artist_id, token):
    '''
    Receives artist_id and a valid token and returns a list of related artists and a list of related genres,
    with possible repeated values.
    '''
    header = {'Authorization': f'Bearer {token}'}
    r = requests.get(f'https://api.spotify.com/v1/artists/{artist_id}/related-artists', headers=header)
    if r.status_code == 200:
        genres = []
        for genre_list in [artist['genres'] for artist in r.json()['artists']]:
            genres = genres+ genre_list
        return {'related_artists': [artist['name'] for artist in r.json()['artists']], 
                'related_genres':genres}
    else:
        print('Error during the request.')
        print(r.status_code)
        print(r.text)

def get_artist_top_tracks(artist_id, token, country_code='AR'):
    '''
    Receives artist_id, a valid token and a country_code and returns the artist's top tracks IDs.
    '''
    header = {'Authorization': f'Bearer {token}'}
    r = requests.get(f'https://api.spotify.com/v1/artists/{artist_id}/top-tracks?country={country_code}', headers=header)
    if r.status_code == 200:
        return [track['id'] for track in r.json()['tracks']]
    else:
        print('Error during the request.')

def get_track_audio_features(track_id, token):
    '''
    Receives track_id and a valid token and returns the track's audio features.
    '''    
    header = {'Authorization': f'Bearer {token}'}
    r = requests.get(f'https://api.spotify.com/v1/audio-features/{track_id}', headers=header)
    if r.status_code == 200:
        return r.json()

Read the credentials from 'credentials.txt'

In [3]:
client_id = None
client_secret = None

with open('credentials.txt', 'r') as file:
    for line in file:
        if 'client_id' in line:
            start = line.find('\'')+1
            end = line[start:].find('\'')
            client_id = line[start:start+end]
            print('Found Client ID')#, client_id)
        elif 'client_secret' in line:
            start = line.find('\'')+1
            end = line[start:].find('\'')
            client_secret = line[start:start+end]
            print('Found Client secret')#, client_secret)
        if client_id != None and client_secret != None:
            break
    file.close()
    
    if client_id == None:
        print('Client ID not found in credentials.txt')
    if client_secret == None:
        print('Client secret not found in credentials.txt')

Found Client ID
Found Client secret


Get a valid token based on the credentials read

In [4]:
token = get_token(client_id, client_secret)

Token valid for 3600 seconds.


Lollapalooza lineup from https://www.lollapaloozaar.com/news/lollapalooza-argentina-confirma-su-lineup-por-dia/

In [5]:
lineup_day_1 = 'Travis Scott – Martin Garrix – Los Fabulosos Cadillacs – Brockhampton – DUKI – Madeon – Rita Ora – A Day to Remember – King Princess – LP – WOS – Denzel Curry – Yungblud – Nathy Peluso – J mena – AJR – Louta – Two Feet – Bizarrap – Fuego – La Delio Valdez – Dani – Kaydy Cain – Maye – Boombox Cartel – Cimafunk – Ms Nina – Feli Colina – Axel Fiks – Lucia Tacchetti – DJ Sky – Alejo y Valentin'
lineup_day_2 = 'The Strokes – Gwen Stefani – Armin Van Buuren – Vampire Weekend – Ratones Paranoicos – ILLENIUM – Kacey Musgraves – Litto Nebbia – Charli XCX – Jaden Smith – R3HAB – Hayley Kiyoko – El Mató a un Policía Motorizado – Rels B – Emmanuel Horvilleur – Kali Uchis – Paloma Mami – Fabiana Cantilo – Trueno – Amaia – Wallows – Yung Beef – Zoe Gotusso – Girl Ultra – Las Ligas Menores – Ainda – Ghetto Kids – D3FAI – Paco Leiva – LIMON – Louly'
lineup_day_3 = 'Guns N’ Roses – Lana del Rey – Cage the Elephant – James Blake – Alan Walker – Rezz – The Lumineers – Rex Orange County – MIKA – Perry Farrell’s Kind Heaven Orchestra – Airbag – Lauv – YSY A – Chris Lake – Pabllo Vittar – San Holo – Emilia – Idles – Masego – The Hu – Elsa y Elmar – Natalie Perez – Goldfish – Florian – Miranda Johansen – DABOW – El Buen Salvaje – Metro Live – Reydel'

Convert the string into a list of artists

In [6]:
str_to_list = lambda x: [band.strip() for band in x.split('–')]

In [7]:
lineup_day_1 = str_to_list(lineup_day_1)
lineup_day_2 = str_to_list(lineup_day_2)
lineup_day_3 = str_to_list(lineup_day_3)

Find artists ids on Spotify

In [8]:
artists_ids = pd.DataFrame([search_artist_id(artist, token) for artist in lineup_day_1+lineup_day_2+lineup_day_3], 
                           columns=['artist', 'artist_id']).set_index('artist')

Artist not found: Perry Farrell’s Kind Heaven Orchestra


Manually correct artist names and IDs

In [9]:
artists_ids.head()

Unnamed: 0_level_0,artist_id
artist,Unnamed: 1_level_1
Travis Scott,0Y5tJX1MQlPlqiwlOH1tJY
Martin Garrix,60d24wfXkVzDSfLS6hyCjZ
Los Fabulosos Cadillacs,2FS22haX3FYbyOsUAkuYqZ
Brockhampton,1Bl6wpkWCQ4KVgnASpvzzA
DUKI,1bAftSH8umNcGZ0uyV7LMg


In [10]:
artists_ids.loc['Dani', 'artist_id'] = '6cC67GpmPCjQjOYLpmOGhN'
artists_ids.loc['Maye', 'artist_id'] = '5ti5FPHgtaSf15KcUisZMt'
artists_ids.loc['DJ Sky', 'artist_id'] = '2I0SNwGhPsYEhKzxUcjab3'
artists_ids.loc['Amaia', 'artist_id'] = '1WLEfsQjvgtFSGkrHonzFX'
artists_ids.loc['LIMON', 'artist_id'] = '3bOCNWtgHz0rLE1j5bfR72'
artists_ids.loc['Florian', 'artist_id'] = '6C3bLjpIfVoapHjMfpYAy2'
artists_ids.loc['Fuego', 'artist_id'] = '7wU2WGCJ8HxkekHHE2QLul'

Manually search for a missing artist's id

In [11]:
search_artist_id('perry farrell', token)

('perry farrell', '2DS5RPK5A2GQTucYlgsQdE')

In [12]:
artists_ids.loc['Perry Farrell’s Kind Heaven Orchestra', 'artist_id'] = search_artist_id('perry farrell', token)[1]

Remove "Metro Live" since it's not an artist.

In [13]:
artists_ids.drop('Metro Live', inplace=True)

## Building the artists DataFrame

In [14]:
artists_ids.head()

Unnamed: 0_level_0,artist_id
artist,Unnamed: 1_level_1
Travis Scott,0Y5tJX1MQlPlqiwlOH1tJY
Martin Garrix,60d24wfXkVzDSfLS6hyCjZ
Los Fabulosos Cadillacs,2FS22haX3FYbyOsUAkuYqZ
Brockhampton,1Bl6wpkWCQ4KVgnASpvzzA
DUKI,1bAftSH8umNcGZ0uyV7LMg


In [15]:
df_artists = artists_ids.copy()

Get info for each artist and add it to the df

In [16]:
df_artists[['followers', 'genres', 'popularity']] = df_artists.apply(lambda x: get_artist_info(x['artist_id'], token=token),
                                                                     axis=1, result_type='expand')

In [17]:
df_artists['albums'] = df_artists.apply(lambda x: get_artist_albums(x['artist_id'], token=token), axis=1)

In [18]:
df_artists['top_tracks'] = df_artists.apply(lambda x: get_artist_top_tracks(x['artist_id'], token=token), axis=1)

In [19]:
df_artists[['related_artists', 'related_genres']] = df_artists.apply(lambda x: get_related_artists(x['artist_id'], token=token),
                                                                     axis=1, result_type='expand')

Indicate on which day each artist is performing

In [20]:
df_artists['day'] = None
df_artists.loc[lineup_day_1[0]:lineup_day_1[-1], 'day'] = 1
df_artists.loc[lineup_day_2[0]:lineup_day_2[-1], 'day'] = 2
df_artists['day'].fillna(3, inplace=True)

In [21]:
df_artists.head()

Unnamed: 0_level_0,artist_id,followers,genres,popularity,albums,top_tracks,related_artists,related_genres,day
artist,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Travis Scott,0Y5tJX1MQlPlqiwlOH1tJY,7705985,[rap],98,"{'2019-12-27': 'JACKBOYS', '2018-11-06': 'NOT ...","[3eekarcy7kvN4yt5ZFzltW, 7h0d2h0fUmzbs7zeFigJP...","[A$AP Rocky, Chief Keef, KIDS SEE GHOSTS, Push...","[east coast hip hop, hip hop, pop rap, rap, tr...",1
Martin Garrix,60d24wfXkVzDSfLS6hyCjZ,12574151,"[big room, edm, pop, progressive house, tropic...",88,"{'2019-07-10': 'The Martin Garrix Experience',...","[7Feaw9WAEREY0DUOSXJLOM, 7pWK1kMgHy5lNNiIfuRbk...","[AREA21, Brooks, Deorro, Lucas & Steve, KAAZE,...","[big room, edm, big room, deep big room, edm, ...",1
Los Fabulosos Cadillacs,2FS22haX3FYbyOsUAkuYqZ,1672897,"[argentine rock, latin alternative, latin rock...",73,{'2017-10-20': 'En Vivo en The Theater at Madi...,"[198qSChSMQFuSimdeeY9gK, 7d4pdMym8ZBOgf1oVPTiP...","[Los Pericos, Vicentico, Aterciopelados, Los A...","[argentine reggae, argentine rock, latin alter...",1
Brockhampton,1Bl6wpkWCQ4KVgnASpvzzA,1186410,"[boy band, hip hop, pop, rap]",85,"{'2019-08-23': 'GINGER', '2018-09-21': 'irides...","[6U0FIYXCQ3TGrk4tFpLrEA, 0dWOFwdXrbBUYqD9DLsoy...","[Kevin Abstract, Matt Champion, JPEGMAFIA, Ame...","[alternative r&b, hip hop, lgbtq+ hip hop, rap...",1
DUKI,1bAftSH8umNcGZ0uyV7LMg,2715207,"[argentine hip hop, trap argentino]",84,"{'2019-11-01': 'Súper Sangre Joven', '2020-01-...","[57kdZIOAaolxAjB67d2yU3, 2ECIwi1a7mfokdDkkJ08N...","[Uriel Natero, Bhavi, KHEA, Lit Killah, Seven ...","[argentine hip hop, trap argentino, argentine ...",1


In [22]:
import itertools

tuples = []
for artist, tracks in zip(df_artists.index, df_artists['top_tracks']):
    for element in itertools.product([artist], tracks):
        tuples.append(element)

df_tracks = pd.MultiIndex.from_tuples(tuples, names=['artist', 'track']).to_frame(index=False)
df_tracks.head(20)

Unnamed: 0,artist,track
0,Travis Scott,3eekarcy7kvN4yt5ZFzltW
1,Travis Scott,7h0d2h0fUmzbs7zeFigJPn
2,Travis Scott,2xLMifQCjDGFmkHkpNLD9h
3,Travis Scott,6gBFPUFcJLzWGx4lenP6h2
4,Travis Scott,4ea9w8c4ROqiZpJVhfBA3m
5,Travis Scott,4AO1XhrgJczQ9bNVxdfKQe
6,Travis Scott,2cYqizR4lgvp4Qu6IQ3qGN
7,Travis Scott,3s4mrPrEFFPF0LmAfutW0n
8,Travis Scott,40mjsnRjCpycdUw3xhS20g
9,Travis Scott,7lAK3oHbfEnvUQWosrMMpR


In [23]:
df_tracks[['acousticness', 'analysis_url', 'danceability', 'duration_ms', 'energy', 'id',
           'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 'speechiness', 'tempo',
           'time_signature', 'track_href', 'type', 'uri', 'valence']] =\
df_tracks.apply(lambda x: get_track_audio_features(x['track'], token=token), axis=1, result_type='expand')

In [24]:
df_tracks.head()

Unnamed: 0,artist,track,acousticness,analysis_url,danceability,duration_ms,energy,id,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,track_href,type,uri,valence
0,Travis Scott,3eekarcy7kvN4yt5ZFzltW,0.0546,https://api.spotify.com/v1/audio-analysis/3eek...,0.598,175721,0.427,3eekarcy7kvN4yt5ZFzltW,6e-06,7,0.21,-8.764,0,0.0317,76.469,4,https://api.spotify.com/v1/tracks/3eekarcy7kvN...,audio_features,spotify:track:3eekarcy7kvN4yt5ZFzltW,0.0605
1,Travis Scott,7h0d2h0fUmzbs7zeFigJPn,0.0567,https://api.spotify.com/v1/audio-analysis/7h0d...,0.559,244874,0.491,7h0d2h0fUmzbs7zeFigJPn,0.0,2,0.0992,-6.406,0,0.0346,153.131,4,https://api.spotify.com/v1/tracks/7h0d2h0fUmzb...,audio_features,spotify:track:7h0d2h0fUmzbs7zeFigJPn,0.114
2,Travis Scott,2xLMifQCjDGFmkHkpNLD9h,0.00513,https://api.spotify.com/v1/audio-analysis/2xLM...,0.834,312820,0.73,2xLMifQCjDGFmkHkpNLD9h,0.0,8,0.124,-3.714,1,0.222,155.008,4,https://api.spotify.com/v1/tracks/2xLMifQCjDGF...,audio_features,spotify:track:2xLMifQCjDGFmkHkpNLD9h,0.446
3,Travis Scott,6gBFPUFcJLzWGx4lenP6h2,0.0847,https://api.spotify.com/v1/audio-analysis/6gBF...,0.841,243837,0.728,6gBFPUFcJLzWGx4lenP6h2,0.0,7,0.149,-3.37,1,0.0484,130.049,4,https://api.spotify.com/v1/tracks/6gBFPUFcJLzW...,audio_features,spotify:track:6gBFPUFcJLzWGx4lenP6h2,0.43
4,Travis Scott,4ea9w8c4ROqiZpJVhfBA3m,0.188,https://api.spotify.com/v1/audio-analysis/4ea9...,0.845,279907,0.631,4ea9w8c4ROqiZpJVhfBA3m,0.0,7,0.0806,-8.206,0,0.216,111.983,4,https://api.spotify.com/v1/tracks/4ea9w8c4ROqi...,audio_features,spotify:track:4ea9w8c4ROqiZpJVhfBA3m,0.545


Export to .csv

In [25]:
df_artists.to_csv('artists.csv', index_label='artist')

In [26]:
df_tracks.to_csv('tracks.csv', index_label='track')

---