In [1]:
import pandas as pd
import numpy as np
import requests
import time

import warnings
warnings.filterwarnings('ignore')

In [2]:
#Importing credentials
from credentials import s_key, ss_key

In [3]:
#Spotify API help from https://github.com/kylepw/spotify-api-auth-examples/blob/master/client/app.py
def auth(key, sec):
    
    authorize = 'https://accounts.spotify.com/api/token'
    param = {
    "Content-Type": "application/x-www-form-urlencoded",
    'grant_type' : 'client_credentials'
    }
    res = requests.post(authorize, auth = (s_key, ss_key), data = param)
    token = res.json()['access_token']
    
    return token

In [4]:
token = auth(s_key, ss_key)

In [5]:
sync = pd.read_csv('./data/sync_spotify_uri.csv')

In [6]:
sync['s_dance'] = 0
sync['s_energy'] = 0 
sync['s_key'] = 0 
sync['s_loudness'] = 0 
sync['s_mode'] = 0
sync['s_speech'] = 0
sync['s_acoustic'] = 0 
sync['s_inst'] = 0
sync['s_live'] = 0 
sync['s_valence'] = 0 
sync['s_tempo'] = 0 
sync['s_duration'] = 0
sync['s_time_sig'] = 0

In [7]:
def get_features(df):
    #Set up empty counter and total songs to get
    count = 0 
    total = len(df)
    
    token = auth(s_key, ss_key)
    header = {'Authorization' : f'Bearer {token}'}
    
    #For each URI
    for i in range(len(df)):
        count += 1
        
        if df.loc[i, 's_uri']:
        
            try:
                uri = df.loc[i, 's_uri']
                res = requests.get(f'https://api.spotify.com/v1/audio-features/{uri}', headers = header) 
                status = res.status_code
                
                if res.status_code == 200:
                    results = res.json()

                    #These are all the features to save:
                    dance = results['danceability']
                    energy = results['energy']
                    key = results['key']
                    loudness = results['loudness']
                    mode = results['mode']
                    speech = results['speechiness']
                    acoustic = results['acousticness']
                    inst = results['instrumentalness']
                    live = results['liveness']
                    valence = results['valence']
                    tempo = results['tempo']
                    duration = results['duration_ms']
                    time_sig = results['time_signature']
                else:
                    #Try reauthenticating
                    token = auth(s_key, ss_key)
                    uri = df.loc[i, 's_uri']
                    res = requests.get(f'https://api.spotify.com/v1/audio-features/{uri}', headers = header) 

                    if res.status_code == 200:
                        results = res.json()

                        #These are all the features to save:
                        dance = results['danceability']
                        energy = results['energy']
                        key = results['key']
                        loudness = results['loudness']
                        mode = results['mode']
                        speech = results['speechiness']
                        acoustic = results['acousticness']
                        inst = results['instrumentalness']
                        live = results['liveness']
                        valence = results['valence']
                        tempo = results['tempo']
                        duration = results['duration_ms']
                        time_sig = results['time_signature']

            except:
                print(f'Could not gather row {i}, status code {status}')
                #Insert NaNs instead:
                dance = np.nan
                energy = np.nan
                key = np.nan
                loudness = np.nan
                mode = np.nan
                speech = np.nan
                acoustic = np.nan
                inst = np.nan
                live = np.nan
                valence = np.nan
                tempo = np.nan
                duration = np.nan
                time_sig = np.nan
        else:
            print(f'No URI for row {i}')
        #Insert NaNs instead:
            dance = np.nan
            energy = np.nan
            key = np.nan
            loudness = np.nan
            mode = np.nan
            speech = np.nan
            acoustic = np.nan
            inst = np.nan
            live = np.nan
            valence = np.nan
            tempo = np.nan
            duration = np.nan
            time_sig = np.nan
                
        df.loc[i, 's_dance'] = dance
        df.loc[i, 's_energy'] = energy 
        df.loc[i, 's_key'] = key 
        df.loc[i, 's_loudness'] = loudness 
        df.loc[i, 's_mode'] = mode
        df.loc[i, 's_speech'] = speech
        df.loc[i, 's_acoustic'] = acoustic 
        df.loc[i, 's_inst'] = inst
        df.loc[i, 's_live'] = live 
        df.loc[i, 's_valence'] = valence 
        df.loc[i, 's_tempo'] = tempo 
        df.loc[i, 's_duration'] = duration
        df.loc[i, 's_time_sig'] = time_sig
               
        if count % 100 == 0:
               print(f'{count} songs gathered out of {total}')
        time.sleep(1)
               
    return df

In [8]:
sync = get_features(sync)

100 songs gathered out of 10845
200 songs gathered out of 10845
300 songs gathered out of 10845
400 songs gathered out of 10845
500 songs gathered out of 10845
600 songs gathered out of 10845
700 songs gathered out of 10845
800 songs gathered out of 10845
900 songs gathered out of 10845
1000 songs gathered out of 10845
1100 songs gathered out of 10845
1200 songs gathered out of 10845
1300 songs gathered out of 10845
1400 songs gathered out of 10845
1500 songs gathered out of 10845
1600 songs gathered out of 10845
1700 songs gathered out of 10845
1800 songs gathered out of 10845
Could not gather row 1847, status code 200
1900 songs gathered out of 10845
2000 songs gathered out of 10845
2100 songs gathered out of 10845
2200 songs gathered out of 10845
2300 songs gathered out of 10845
2400 songs gathered out of 10845
2500 songs gathered out of 10845
Could not gather row 2576, status code 200
2600 songs gathered out of 10845
2700 songs gathered out of 10845
2800 songs gathered out of 10845

In [9]:
sync.isna().sum()

index              0
title              0
artist             0
year               0
explicit           0
styles             0
languages          0
title_artist       0
synced             0
d_id               0
d_song             0
d_isrc             0
d_release          0
d_explicit         0
d_bpm              0
d_artist           0
d_album_id         0
d_album            0
d_art              0
lyric_url         10
l_title            0
l_artist           0
l_album            0
l_writer           0
l_pub              0
s_artist          19
s_track           19
s_uri             19
s_dance         4123
s_energy        4123
s_key           4123
s_loudness      4123
s_mode          4123
s_speech        4123
s_acoustic      4123
s_inst          4123
s_live          4123
s_valence       4123
s_tempo         4123
s_duration      4123
s_time_sig      4123
dtype: int64

In [10]:
sync.tail()

Unnamed: 0,index,title,artist,year,explicit,styles,languages,title_artist,synced,d_id,...,s_loudness,s_mode,s_speech,s_acoustic,s_inst,s_live,s_valence,s_tempo,s_duration,s_time_sig
10840,14685,Fixing A Hole,The Beatles,1967,0,Rock,English,fixing a hole - the beatles,0,116348678,...,,,,,,,,,,
10841,14686,It Came Upon a Midnight Clear,Frank Sinatra,1948,0,"Christmas,Christian,Traditionnal",English,it came upon a midnight clear - frank sinatra,0,115007404,...,,,,,,,,,,
10842,14688,Beautiful War,Kings of Leon,2013,0,"Alternative,Rock",English,beautiful war - kings of leon,0,70584821,...,,,,,,,,,,
10843,14689,Midnight Blues,Gary Moore,1990,0,"Blues,Rock",English,midnight blues - gary moore,0,3133096,...,,,,,,,,,,
10844,14690,Smokin' and Drinkin',Miranda Lambert,2014,0,"Pop,Country,Soft rock",English,smokin' and drinkin' - miranda lambert,0,78383556,...,,,,,,,,,,


In [11]:
take2 = sync[sync['s_dance'].isna()]

In [12]:
sync = sync[sync['s_dance'].notna()]

In [13]:
sync.to_csv('./data/sync_spotify_final_1.csv', index = False)

In [14]:
take2.head(1)

Unnamed: 0,index,title,artist,year,explicit,styles,languages,title_artist,synced,d_id,...,s_loudness,s_mode,s_speech,s_acoustic,s_inst,s_live,s_valence,s_tempo,s_duration,s_time_sig
1847,2066,Georgy Girl,The Seekers,1967,0,"Pop,Oldies,Folk",English,georgy girl - the seekers,1,965282572,...,,,,,,,,,,


In [15]:
take2 = take2.reset_index()

In [16]:
take2 = get_features(take2)

100 songs gathered out of 4123
200 songs gathered out of 4123
300 songs gathered out of 4123
400 songs gathered out of 4123
500 songs gathered out of 4123
600 songs gathered out of 4123
700 songs gathered out of 4123
800 songs gathered out of 4123
900 songs gathered out of 4123
1000 songs gathered out of 4123
1100 songs gathered out of 4123
1200 songs gathered out of 4123
1300 songs gathered out of 4123
1400 songs gathered out of 4123
1500 songs gathered out of 4123
1600 songs gathered out of 4123
1700 songs gathered out of 4123
1800 songs gathered out of 4123
1900 songs gathered out of 4123
2000 songs gathered out of 4123
2100 songs gathered out of 4123
2200 songs gathered out of 4123
2300 songs gathered out of 4123
2400 songs gathered out of 4123
2500 songs gathered out of 4123
2600 songs gathered out of 4123
2700 songs gathered out of 4123
2800 songs gathered out of 4123
2900 songs gathered out of 4123
3000 songs gathered out of 4123
3100 songs gathered out of 4123
3200 songs gather

In [17]:
take2['s_uri']

0       73XQBWHSVLL1939hb8cQTm
1       73tGO4JJKrMYtjCbq1v8Oa
2       4yyQekWpi9QPAb3c0AcTBs
3       2ia7iiEtpiOL2ZVuWxBZGB
4       4IBuAtdsM6psUgfhHgoy4e
                 ...          
4118    3pKKxkeB1pOUMHwWBmKc3Y
4119    55YzKJipEDxzduTSrAWtMj
4120    3iKmdCDZK4XmqxIA9fVVGs
4121    2n5LevrNK3JbN1WWtXuLD6
4122    13keyz9ikBe6ZpRasw7l4X
Name: s_uri, Length: 4123, dtype: object

In [18]:
take2.isna().sum()

level_0          0
index            0
title            0
artist           0
year             0
explicit         0
styles           0
languages        0
title_artist     0
synced           0
d_id             0
d_song           0
d_isrc           0
d_release        0
d_explicit       0
d_bpm            0
d_artist         0
d_album_id       0
d_album          0
d_art            0
lyric_url        0
l_title          0
l_artist         0
l_album          0
l_writer         0
l_pub            0
s_artist        11
s_track         11
s_uri           11
s_dance          0
s_energy         0
s_key            0
s_loudness       0
s_mode           0
s_speech         0
s_acoustic       0
s_inst           0
s_live           0
s_valence        0
s_tempo          0
s_duration       0
s_time_sig       0
dtype: int64

In [19]:
take3 = take2[take2['s_dance'].isna()]
take2 = take2[take2['s_dance'].notna()]

In [20]:
take2.to_csv('./data/sync_spotify_final_2.csv', index = False)