In [1]:
# adapted from https://stmorse.github.io/journal/spotify-api.html and 
# https://github.com/ZipBomb/spotify-song-suggestion/blob/master/random_song.py
# popularity from Nov 23, 2020

In [4]:
import base64
import json
import random
import urllib
import requests
from tqdm import tqdm
import pandas as pd

CLIENT_ID = '6480491e23264b429963178e13b36b1e'
CLIENT_SECRET = 'cbd284788bf846b6bf7fd2b870fa1da5'

# Spotify API URIs
words_txt = pd.read_csv('word_list.txt',header=None)
word_list = words_txt[0].to_list()

AUTH_URL = 'https://accounts.spotify.com/api/token'

# POST
auth_response = requests.post(AUTH_URL, {
    'grant_type': 'client_credentials',
    'client_id': CLIENT_ID,
    'client_secret': CLIENT_SECRET,
})

# convert the response to JSON
auth_response_data = auth_response.json()

# save the access token
access_token = auth_response_data['access_token']

headers = {
'Authorization': 'Bearer {token}'.format(token=access_token)
}

BASE_URL = 'https://api.spotify.com/v1/'

In [3]:
def get_song(number):
    
    #input: number of songs requestes
    #output: df with track name and id, artist name and search term
    
    song_df = pd.DataFrame(columns=['track','artist','id','search term'])
    songs = 0
    pbar = tqdm(total=number,position=0, leave=True)
    
    while songs < number:
    
        # search term for random search
        search_term = random.choice(word_list)
        
        song_id = None
        

        # Cap the max number of requests
        for i in range(10):
            try:
                song_request = requests.get(
                    '{}search?q={}&type=track&offset={}'.format(
                        BASE_URL,
                        search_term,
                        random.randint(0, 200)
                    ),
                    headers = headers
                )
                song_info = random.choice(json.loads(song_request.text)['tracks']['items'])
                artist = song_info['artists'][0]['name']
                song = song_info['name']
                song_id = song_info['id']
                break
            except: #not all words will bring up a song
                continue
                
        if song_id: #only append valid songs
            song_to_append = pd.DataFrame([[song,artist,song_id,search_term]],columns=['track','artist','id',
                                                                                   'search term'])
            song_df = song_df.append(song_to_append,ignore_index = True)
            songs+=1
            pbar.update(1)
            
    pbar.close()
    return song_df

In [4]:
def get_traits(df):
    #input: df with song ids in column 'id'
    #output: df with track audio features 
    
    attr_df = pd.DataFrame(columns=['artist','track','id','danceability','energy','key','loudness','mode',
                                           'speechiness','acousticness','instrumentalness','liveness',
                                           'valence','tempo','duration_ms'])
    pbar = tqdm(total=len(df['id']),position=0, leave=True)
        
    error_ls = []
    for i in range(len(df['id'])): 
        try:
            track_id = df['id'][i]
            r = requests.get(BASE_URL + 'audio-features/' + track_id, headers=headers)
            r = r.json()

            attr_to_append = pd.DataFrame([[df['artist'][i],df['track'][i],df['id'][i], r['danceability'],r['energy'], 
                                          r['key'],r['loudness'],r['mode'], r['speechiness'],r['acousticness'],
                                          r['instrumentalness'],r['liveness'],r['valence'],r['tempo'],
                                            r['duration_ms']]],columns=['artist','track','id','danceability',
                                                                        'energy','key','loudness','mode',
                                                                        'speechiness','acousticness',
                                                                        'instrumentalness','liveness','valence',
                                                                        'tempo','duration_ms'])
            
            attr_df = attr_df.append(attr_to_append,ignore_index = True)
            pbar.update(1)
        
        except:
            error_ls.append(df['id'][i])
            pbar.update(1)
            continue 
        
    pbar.close()
    return attr_df, error_ls

In [5]:
def get_tracks_info(df):
    
    #input: df with song ids in column 'id'
    #output: df with track popularity, artist and album info
    
    attr_df = pd.DataFrame(columns=['id','popularity', 'artist dict', 'album blob'])
    pbar = tqdm(total=len(df['id']),position=0, leave=True)
    error_ls = []

    for i in range(len(df['id'])): 
        try:
            track_id = df['id'][i]
            r = requests.get(BASE_URL + 'tracks/' + track_id, headers=headers)
            r = r.json()

            attr_to_append = pd.DataFrame([[df['id'][i], r['popularity'],r['artists'],r['album']]],
                                          columns=['id','popularity', 'artist dict', 'album blob'])
            attr_df = attr_df.append(attr_to_append,ignore_index = True)
            pbar.update(1)
        
        except:
            error_ls.append(df['id'][i])
            pbar.update(1)
            continue 
        
    pbar.close()
    return attr_df, error_ls

In [6]:
def format_artist(df):
    
    #input: df with artist dict and song id 
    #output: df with list of names and ids for each artist on a track
    
    attr_df = pd.DataFrame(columns=['id','artist name ls','artist id ls'])
    
    for i in range(len(df['artist dict'])): 
        
        artist_dict = df['artist dict'][i]
        artist_ids = []
        artist_names = []
        
        for d in range(len(artist_dict)):
            artist_ids.append(artist_dict[d]['id'])
            artist_names.append(artist_dict[d]['name'])
        
        attr_to_append = pd.DataFrame([[df['id'][i], artist_names, artist_ids]], 
                                      columns=['id','artist name ls', 'artist id ls'])  
        attr_df = attr_df.append(attr_to_append,ignore_index = True)
    
       
    return attr_df

In [8]:
def get_artist_info(df):
    
    #request artist popularity from API
    #input: df with list of artist ids
    #output: list of artist popularity scores
    
    attr_df = pd.DataFrame(columns=['id','artist popularity'])
    num_artists = 0
    
    for i in range(len(df['artist name ls'])):
        num_artists += len(df['artist name ls'][i])
        
    pbar = tqdm(total=num_artists-1,position=0, leave=True)
    error_ls = []

    for i in range(len(df['artist id ls'])):
        artist_ls = df['artist id ls'][i]
        popularity = []
    
        for a in range(len(artist_ls)):
            try:
                artist_id = artist_ls[a]
                print(artist_id)
                r = requests.get(BASE_URL + 'artists/' + artist_id, headers=headers)
                r = r.json()
                popularity.append(r['popularity'])
                print(r['popularity'])
                pbar.update(1)

            except:
                error_ls.append(df['id'][i])
                pbar.update(1)
                continue 
        attr_to_append = pd.DataFrame([[df['id'][i], popularity]], columns=['id','artist popularity'])
        attr_df = attr_df.append(attr_to_append,ignore_index = True)
            
    return attr_df, error_ls

In [7]:
def get_release(blob):
    #extract release date
    return blob['release_date']

In [11]:
# songs = get_song(10000)
songs = pd.read_csv('songsnov23.csv', index_col = 0)

In [10]:
traits = pd.read_csv('traitsnov23.csv', index_col = 0)

In [None]:
info, info_err = get_tracks_info(songs)
len(info_err)

  5%|▍         | 452/10000 [00:51<23:55,  6.65it/s]

In [43]:
artists_format = format_artist(info)

In [None]:
artist_pop, err_artist_pop = get_artist_info(artists_format)
len(err_artist_pop)

In [39]:
artist_pop

Unnamed: 0,id,artist popularity
0,0ojSU3QoovuJxwVYk34opc,[]
1,42ToYHW2koZnAqgDoEQ314,[]
2,1nfbOAPwnMRBlE0rUNGg43,[]
3,4iuthceAzx9jJt5o08HwMb,[]
4,7l1B4m3ujaWim9n3zMMhwa,[]
...,...,...
95,4nlFFXZsONdu6mjoXvi1nt,[]
96,084j8LJPfwtqPQhfGpYtlV,[]
97,1euszbbvkUzQzIhZUWFmXG,[]
98,3zJYK6ma0LyCZswOHuiqtR,[]


In [35]:
info

Unnamed: 0,id,popularity,artist dict,album blob
0,0ojSU3QoovuJxwVYk34opc,35,[{'external_urls': {'spotify': 'https://open.s...,"{'album_type': 'album', 'artists': [{'external..."
1,42ToYHW2koZnAqgDoEQ314,28,[{'external_urls': {'spotify': 'https://open.s...,"{'album_type': 'album', 'artists': [{'external..."
2,1nfbOAPwnMRBlE0rUNGg43,21,[{'external_urls': {'spotify': 'https://open.s...,"{'album_type': 'single', 'artists': [{'externa..."
3,4iuthceAzx9jJt5o08HwMb,0,[{'external_urls': {'spotify': 'https://open.s...,"{'album_type': 'album', 'artists': [{'external..."
4,7l1B4m3ujaWim9n3zMMhwa,4,[{'external_urls': {'spotify': 'https://open.s...,"{'album_type': 'album', 'artists': [{'external..."
...,...,...,...,...
9620,5EnLOnBzJ9BJ0sdw0iLEJK,21,[{'external_urls': {'spotify': 'https://open.s...,"{'album_type': 'album', 'artists': [{'external..."
9621,7kNhdjo60mRthDO1A2duAL,18,[{'external_urls': {'spotify': 'https://open.s...,"{'album_type': 'single', 'artists': [{'externa..."
9622,3cSIkJ1e5c5TQ4dujshOJd,21,[{'external_urls': {'spotify': 'https://open.s...,"{'album_type': 'album', 'artists': [{'external..."
9623,1dQhoMZGp3vp2chz9O87Ht,1,[{'external_urls': {'spotify': 'https://open.s...,"{'album_type': 'album', 'artists': [{'external..."


In [None]:
info['album blob'] = info['album blob'].apply(get_release)

In [32]:
info

Unnamed: 0,id,popularity,artist dict,album blob
0,0ojSU3QoovuJxwVYk34opc,35,[{'external_urls': {'spotify': 'https://open.s...,"{'album_type': 'album', 'artists': [{'external..."
1,42ToYHW2koZnAqgDoEQ314,28,[{'external_urls': {'spotify': 'https://open.s...,"{'album_type': 'album', 'artists': [{'external..."
2,1nfbOAPwnMRBlE0rUNGg43,21,[{'external_urls': {'spotify': 'https://open.s...,"{'album_type': 'single', 'artists': [{'externa..."
3,4iuthceAzx9jJt5o08HwMb,0,[{'external_urls': {'spotify': 'https://open.s...,"{'album_type': 'album', 'artists': [{'external..."
4,7l1B4m3ujaWim9n3zMMhwa,4,[{'external_urls': {'spotify': 'https://open.s...,"{'album_type': 'album', 'artists': [{'external..."
...,...,...,...,...
9620,5EnLOnBzJ9BJ0sdw0iLEJK,21,[{'external_urls': {'spotify': 'https://open.s...,"{'album_type': 'album', 'artists': [{'external..."
9621,7kNhdjo60mRthDO1A2duAL,18,[{'external_urls': {'spotify': 'https://open.s...,"{'album_type': 'single', 'artists': [{'externa..."
9622,3cSIkJ1e5c5TQ4dujshOJd,21,[{'external_urls': {'spotify': 'https://open.s...,"{'album_type': 'album', 'artists': [{'external..."
9623,1dQhoMZGp3vp2chz9O87Ht,1,[{'external_urls': {'spotify': 'https://open.s...,"{'album_type': 'album', 'artists': [{'external..."


NameError: name 'info' is not defined