In [27]:
import os
import pandas as pd
import helpers.spotify_web_api as spotify
import pickle
import time

DIR_DATA = os.path.join('..', 'data')
DIR_PROCESSED = os.path.join(DIR_DATA, 'processed')
DIR_RAW = os.path.join(DIR_DATA, 'raw')

FILENAME_OUT = 'track_audio_features.csv'
PATH_OUT = os.path.join(DIR_RAW, FILENAME_OUT)

FILENAME_TRACKLIST = 'unique_track_uris50000.pkl'
PATH_TRACKLIST = os.path.join(DIR_PROCESSED, FILENAME_TRACKLIST)

AUDIO_FEATURES = ['tempo', 'key', 'mode', 'loudness', 'danceability', 'energy', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'duration_ms', 'time_signature']
COL_INDEX = 'id'
COL_NAMES = [COL_INDEX] + AUDIO_FEATURES

## Table Initialization
Choose one of the two methods to either read an existing table and extend it or create a new empty one.

Read existing table:

In [8]:
df = pd.read_csv(PATH_OUT)
df.set_index(COL_INDEX, inplace=True)
display(df)

Unnamed: 0_level_0,tempo,key,mode,loudness,danceability,energy,speechiness,acousticness,instrumentalness,liveness,valence,duration_ms,time_signature
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
40NRm1ZLvZpUSCUXAGGZ8J,test,test,test,test,test,test,test,test,test,test,test,test,test
40NRm1ZLvZpUSCUXAGGZ8A,test,test,test,test,test,test,test,test,test,test,test,test,test


Create new table:

In [10]:
df = pd.DataFrame(columns=COL_NAMES)
df.set_index('id', inplace=True)
# df.loc['40NRm1ZLvZpUSCUXAGGZ8J'] = ['test', 'test', 'test', 'test', 'test', 'test', 'test', 'test', 'test', 'test', 'test', 'test', 'test']
# df.loc['40NRm1ZLvZpUSCUXAGGZ8A'] = ['test', 'test', 'test', 'test', 'test', 'test', 'test', 'test', 'test', 'test', 'test', 'test', 'test']
# display(df)

## Song List Loading

In [8]:
with open(PATH_TRACKLIST, 'rb') as file_tracklist:
    track_ids = pickle.load(file_tracklist)

# print(track_ids[:40])

['3zyYVItCMCjFzBHTyjrxPK', '4jSy0HTIoC9yiwZ8OVyTCW', '4zyqBSUFNkJ20mw1FB68gt', '63B3TtwUzOoJoe3unMteVa', '7y9iMe8SOB6z3NoHE2OfXl', '019PtBbTsInhXDfJvcOwl1', '2SLLtfoSOql3QgPVNvqnfQ', '4rFzs2vXOyeDVsJfH4osbq', '4UDUsHgTcM8gAxpYOE6nuU', '4C4LzdHAPB3telqKE3lZVQ', '0hJQViw2kz8FieKYHbE0e9', '2nUGM4Y3rxzJeaANsUSP9A', '3JZHwlhbFZ92TZ48stSEKx', '33dDgCrj6GzT5l3YO4CKFT', '0KZK8rrdzRza3hSvpS2jZq', '2TCX9sbjN34JxoWZsyAT2F', '74uRgVFP1Y6urfHMMo5XXw', '0XGmqIqDSQP6s8szzG2RVg', '6UfWrUOBS6ZGnCg5ZLuM9c', '2yt1rI63hHieGFgdnPzKzN', '6WD2t53hdZH6TbQmlzuuly', '77RGwvPkjh26sy1KS2Xcyb', '1xQTwvklGpxcRrXVo4WgpI', '5nNVUJaPFtJEni4RK5ZS3U', '2fMCv3yixOLxHX1VIkJkSi', '34v7hVx2T6fkCbj4Wt0JDY', '6ODrccdAEWWxgXFNBFSfAv', '5AHFmcaxctrp0c2Ezo6eXn', '1OSEBkpdq1U2dy4I0rGg1Z', '513Fp1fXzZ3MzVIDng28wL', '0bWfxPhxOepDV01eMECRYl', '3OFVz4ve2BoZktajhFhpTY', '7u1c2lUbbiEOC3sqK1Hozc', '5q3lFZYp9HLaXATffQSgcj', '0DQmhkaUy153zu6qdoBvB6', '6BqK0QbNZTO0NA06pt7Kzc', '0w9Z2dfd8yHNq5UH09VYsK', '5R4M7bwrcA7j2DCeXCl9lA', '7twnPdy5aD

## Actual Work

In [40]:
start = 1000
for i in range(start, len(track_ids)):
    id = track_ids[i]
    print(f'{i:10d}: {id}', end='\r')
    time.sleep(0.3)
    features_json = spotify.get_track_features(id)
    features_list = [features_json[feat] for feat in AUDIO_FEATURES if feat in features_json]
    df.loc[features_json['id']] = features_list
    i += 1

display(df)

      1000: 18OiohNe6IoXP6kwYtVyQc

HTTPError: 429 Client Error: Too Many Requests for url: https://api.spotify.com/v1/audio-features/18OiohNe6IoXP6kwYtVyQc

## Saving to File

In [31]:
df.to_csv(PATH_OUT)

## Misc


In [39]:
display(df)

Unnamed: 0_level_0,tempo,key,mode,loudness,danceability,energy,speechiness,acousticness,instrumentalness,liveness,valence,duration_ms,time_signature
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
3zyYVItCMCjFzBHTyjrxPK,79.227,8.0,1.0,-3.406,0.4660,0.8560,0.3180,0.257000,0.000000,0.6750,0.5310,292223.0,4.0
4jSy0HTIoC9yiwZ8OVyTCW,164.207,1.0,1.0,-7.972,0.5100,0.8490,0.1190,0.000546,0.000748,0.6890,0.8870,315067.0,4.0
4zyqBSUFNkJ20mw1FB68gt,83.947,4.0,0.0,-22.867,0.3080,0.1140,0.0321,0.958000,0.902000,0.0853,0.3030,350906.0,4.0
63B3TtwUzOoJoe3unMteVa,93.696,1.0,0.0,-4.166,0.6600,0.9430,0.2770,0.129000,0.000000,0.5570,0.5990,210733.0,4.0
7y9iMe8SOB6z3NoHE2OfXl,118.384,0.0,1.0,-3.539,0.6750,0.7510,0.0296,0.060400,0.000000,0.0893,0.6120,181279.0,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
6Ao4IpHj2vgru3DiGhguRE,163.998,3.0,1.0,-2.288,0.3190,0.9070,0.0628,0.012400,0.000000,0.0705,0.4640,214347.0,4.0
4rXGclWtd3LiD8AcBk7cEt,45.876,10.0,1.0,-28.012,0.0783,0.0411,0.0411,0.045800,0.822000,0.1200,0.0354,249962.0,4.0
70CUNbJ3qoKPAySD5qGnx8,91.845,4.0,0.0,-14.412,0.5190,0.4640,0.1280,0.184000,0.000176,0.4860,0.5910,74507.0,4.0
01xVIxahT2Z5YQTojiUPf8,118.004,0.0,1.0,-5.817,0.6010,0.8240,0.0280,0.000541,0.000007,0.2380,0.7350,219027.0,4.0
