In [None]:
import requests
import pandas as pd
import base64

Define utility functions to fetch data from Spotify Web API

In [None]:
def get_token(client_id, client_secret):
    '''
    Function that receives client_id and client_secret from the Spotify app
    and returns a valid token for using with the API.
    '''
    encoded = base64.b64encode(bytes(client_id+':'+client_secret, 'utf-8'))
    params = {'grant_type':'client_credentials'}
    header = {'Authorization': 'Basic ' + str(encoded, 'utf-8')}
    r = requests.post('https://accounts.spotify.com/api/token', headers=header, data=params)
    if r.status_code != 200:
        print('Error during the request.', r.json())
        return None
    print('Token valid for {} seconds.'.format(r.json()['expires_in']))
    return r.json()['access_token']

def search_artist_id(string, token):
    '''
    Receives a search string and a valid token and returns the most popular artist matching that string,
    including artist ID. If the search has 0 results, the return value is (string, None) .
    '''
    ep_search = 'https://api.spotify.com/v1/search'
    header = {'Authorization': f'Bearer {token}'}
    search_params = {'q': string, 'type': 'artist'} #, 'market': 'AR'}
    r = requests.get(ep_search, headers=header, params=search_params)

    if r.status_code == 200:
        if len(r.json()['artists']['items']) > 0:
            return tuple(pd.DataFrame(r.json()['artists']['items']).sort_values(by='popularity',ascending=False).loc[0,['name', 'id']].values)
        else:
            print('Artist not found:', string)
            return (string, None)

    else:
        print('Error during the request.')
        print(r.status_code)
        print(r.text)
        
def get_artist_info(artist_id, token):
    '''
    Receives artist_id and a valid token and returns amount of followers, genres and popularity of the artist.
    '''
    header = {'Authorization': f'Bearer {token}'}
    r = requests.get(f'https://api.spotify.com/v1/artists/{artist_id}', headers=header)
    if r.status_code == 200:
        return {'followers': r.json()['followers']['total'], 
                'genres': r.json()['genres'], 
                'popularity': r.json()['popularity']}
    else:
        print('Error during the request.')
        print(r.status_code)
        print(r.text)

def get_artist_albums(artist_id, token):
    '''
    Receives artist_id and a valid token and returns a list of dictionary of albums,
    where keys correspond to release dates and values to the names of the albums.
    '''
    header = {'Authorization': f'Bearer {token}'}
    r = requests.get(f'https://api.spotify.com/v1/artists/{artist_id}/albums?limit=50&include_groups=album,single,appears_on', headers=header)
    if r.status_code == 200:
        albums = dict()
        for item in r.json()['items']:
            albums[item['release_date']] = item['name']
        
        next_ = r.json()['next']
        while next_ != None:
            r = requests.get(next_, headers=header)
            if r.status_code == 200:
                for item in r.json()['items']:
                    albums[item['release_date']] = item['name']
                next_ = r.json()['next']
        
        return albums
    
    else:
        print('Error during the request.')
        print(r.status_code)
        print(r.text)
        
def get_related_artists(artist_id, token):
    '''
    Receives artist_id and a valid token and returns a list of related artists and a list of related genres,
    with possible repeated values.
    '''
    header = {'Authorization': f'Bearer {token}'}
    r = requests.get(f'https://api.spotify.com/v1/artists/{artist_id}/related-artists', headers=header)
    if r.status_code == 200:
        genres = []
        for genre_list in [artist['genres'] for artist in r.json()['artists']]:
            genres = genres+ genre_list
        return {'related_artists': [artist['name'] for artist in r.json()['artists']], 
                'related_genres':genres}
    else:
        print('Error during the request.')
        print(r.status_code)
        print(r.text)

Read the credentials from 'credentials.txt'

In [None]:
client_id = None
client_secret = None

with open('credentials.txt', 'r') as file:
    for line in file:
        if 'client_id' in line:
            start = line.find('\'')+1
            end = line[start:].find('\'')
            client_id = line[start:start+end]
            print('Found Client ID')#, client_id)
        elif 'client_secret' in line:
            start = line.find('\'')+1
            end = line[start:].find('\'')
            client_secret = line[start:start+end]
            print('Found Client secret')#, client_secret)
        if client_id != None and client_secret != None:
            break
    file.close()
    
    if client_id == None:
        print('Client ID not found in credentials.txt')
    if client_secret == None:
        print('Client secret not found in credentials.txt')

Get a valid token based on the credentials read

In [None]:
token = get_token(client_id, client_secret)

Lollapalooza lineup from https://www.lollapaloozaar.com/news/lollapalooza-argentina-confirma-su-lineup-por-dia/

In [None]:
lineup_day_1 = 'Travis Scott – Martin Garrix – Los Fabulosos Cadillacs – Brockhampton – DUKI – Madeon – Rita Ora – A Day to Remember – King Princess – LP – WOS – Denzel Curry – Yungblud – Nathy Peluso – J mena – AJR – Louta – Two Feet – Bizarrap – Fuego – La Delio Valdez – Dani – Kaydy Cain – Maye – Boombox Cartel – Cimafunk – Ms Nina – Feli Colina – Axel Fiks – Lucia Tacchetti – DJ Sky – Alejo y Valentin'
lineup_day_2 = 'The Strokes – Gwen Stefani – Armin Van Buuren – Vampire Weekend – Ratones Paranoicos – ILLENIUM – Kacey Musgraves – Litto Nebbia – Charli XCX – Jaden Smith – R3HAB – Hayley Kiyoko – El Mató a un Policía Motorizado – Rels B – Emmanuel Horvilleur – Kali Uchis – Paloma Mami – Fabiana Cantilo – Trueno – Amaia – Wallows – Yung Beef – Zoe Gotusso – Girl Ultra – Las Ligas Menores – Ainda – Ghetto Kids – D3FAI – Paco Leiva – LIMON – Louly'
lineup_day_3 = 'Guns N’ Roses – Lana del Rey – Cage the Elephant – James Blake – Alan Walker – Rezz – The Lumineers – Rex Orange County – MIKA – Perry Farrell’s Kind Heaven Orchestra – Airbag – Lauv – YSY A – Chris Lake – Pabllo Vittar – San Holo – Emilia – Idles – Masego – The Hu – Elsa y Elmar – Natalie Perez – Goldfish – Florian – Miranda Johansen – DABOW – El Buen Salvaje – Metro Live – Reydel'

Convert the string into a list of artists

In [None]:
str_to_list = lambda x: [band.strip() for band in x.split('–')]

In [None]:
lineup_day_1 = str_to_list(lineup_day_1)
lineup_day_2 = str_to_list(lineup_day_2)
lineup_day_3 = str_to_list(lineup_day_3)

Find artists ids on Spotify

In [None]:
artists_ids = [search_artist_id(artist, token) for artist in lineup_day_1+lineup_day_2+lineup_day_3]

In [None]:
artists_ids = {artist_name: artist_id for (artist_name, artist_id) in artists_ids}

Manually search for a missing artist's id

In [None]:
search_artist_id('perry farrell', token)

In [None]:
artists_ids['Perry Farrell’s Kind Heaven Orchestra'] = search_artist_id('perry farrell', token)[1]

Remove "Metro Live" since it's not an artist.

In [None]:
artists_ids.pop('Metro Live')

## Building the artists DataFrame

In [None]:
df_artists = pd.DataFrame.from_dict(artists_ids, orient='index', columns=['artist_id'])

Get info for each artist and add it to the df

In [None]:
df_artists[['followers', 'genres', 'popularity']] = df_artists.apply(lambda x: get_artist_info(x['artist_id'], token=token),
                                                                     axis=1, result_type='expand')

In [None]:
df_artists['albums'] = df_artists.apply(lambda x: get_artist_albums(x['artist_id'], token=token), axis=1)

In [None]:
df_artists[['related_artists', 'related_genres']] = df_artists.apply(lambda x: get_related_artists(x['artist_id'], token=token),
                                                                     axis=1, result_type='expand')

Indicate on which day each artist is performing

In [None]:
df_artists['day'] = None
df_artists.loc[lineup_day_1[0]:lineup_day_1[-1], 'day'] = 1
df_artists.loc[lineup_day_2[0]:lineup_day_2[-1], 'day'] = 2
df_artists['day'].fillna(3, inplace=True)

In [None]:
df_artists.head()

Export to .csv

In [None]:
df_artists.to_csv('artists.csv', index_label='artist')

---