# Spotify audio features collection for given list of track IDs

## Import Modules and Set Constants

In [19]:
import os
import pandas as pd
import helpers.spotify_web_api as spotify
import pickle

DIR_DATA = os.path.join('..', 'data')
DIR_PROCESSED = os.path.join(DIR_DATA, 'processed')
DIR_RAW = os.path.join(DIR_DATA, 'raw')

FILENAME_OUT = 'track_audio_features.csv'
PATH_OUT = os.path.join(DIR_RAW, FILENAME_OUT)

FILENAME_TRACKLIST = 'unique_track_uris50000.pkl'
PATH_TRACKLIST = os.path.join(DIR_PROCESSED, FILENAME_TRACKLIST)

AUDIO_FEATURES = ['tempo', 'key', 'mode', 'loudness', 'danceability', 'energy', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'duration_ms', 'time_signature']
COL_INDEX = 'id'
COL_NAMES = [COL_INDEX] + AUDIO_FEATURES

SIZE_CHUNKS = 100   # max 100
TIMEOUT_REQUESTS = 1
TIMEOUT_BAD_RESPONSE = 1 * 60


## Create/Load Table

Read existing table:

In [None]:
df = pd.read_csv(PATH_OUT)
df.set_index(COL_INDEX, inplace=True)
display(df)

Create new table:

In [2]:
df = pd.DataFrame(columns=COL_NAMES)
df.set_index('id', inplace=True)

display(df)

## Load Tracklist

In [None]:
with open(PATH_TRACKLIST, 'rb') as file_tracklist:
    track_ids = pickle.load(file_tracklist)

print(f'Loaded {len(track_ids)} track IDs!')

## Get Audio Features
Batch request audio features for the list of track IDs and add responses to the table.

In [None]:
import time
import requests

i = 38150
while (i < len(track_ids)):
    try:
        track_ids_chunk = track_ids[i:i + SIZE_CHUNKS]
        features_chunk = spotify.get_track_features(track_ids_chunk)
        print(f'{i:7d} - {i + SIZE_CHUNKS:7d} : {features_chunk}', end='\r')

        for track_features in features_chunk['audio_features']:
            if (track_features != None):
                track_features_sorted = [track_features[feat] for feat in AUDIO_FEATURES if feat in track_features]
                df.loc[track_features['id']] = track_features_sorted

        time.sleep(TIMEOUT_REQUESTS)
        i += SIZE_CHUNKS
    except requests.HTTPError as err:
        print(f'HTTP Error at {i}: {err.strerror}', end='\r')
        time.sleep(TIMEOUT_BAD_RESPONSE)

display(df)

## Save results to File
**Warning: overwrites existing file!**

In [16]:
df.to_csv(PATH_OUT)

## Misc


In [None]:
spotify.get_track_features(['40NRm1ZLvZpUSCUXAGGZ8J', '0PYX7X0wTBT5WvKzNCWtk2'])

In [None]:
display(df)
print(len(df))

In [None]:
features_batch = spotify.get_track_features(track_ids[:100])

In [None]:
print(len(track_ids))