In [28]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import sys
import os
import pprint

import pandas as pd
import numpy as np
import re

from tqdm.notebook import tqdm

In [29]:
## Set environment variables
os.environ['SPOTIPY_CLIENT_ID'] = ''
os.environ['SPOTIPY_CLIENT_SECRET'] = ''

In [32]:
## let's read in our DF with song id's to make API calls
spotify_df = pd.read_csv("data/dataSpotifyJW.csv") ## change to the dataSpotifyJW.csv file

In [35]:
## let's check out how many uniqued SongID's there are
len(spotify_df.SongID)

23320

In [36]:
## get an array with our SongID (we should probably get unique IDs and then match them)
song_ids = spotify_df["SongID"].values
print(f"Number of SongIDs: {len(song_ids)}\nNumber of unique IDs: {len(set(song_ids))}")

Number of SongIDs: 23320
Number of unique IDs: 23146


In [37]:
song_ids

array(['3YOhXYCLFRQxEmUlzjiWEJ', '53RfjM48r9xNywpDiog6zG',
       '73vcZCFNErwqHxuS1BXH0e', ..., '0qBxAcenqF2lwbGi0HRZpW',
       '3O9TJ95zrSqlg5Qk9BBq4V', '0xpLHUe3nyQO1ObHNJWSsB'], dtype=object)

In [38]:
## making search API call
sp = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials())

demo_id = "7lmeHLHBe4nmXzuXc0HDjk"
demo_id2 = "7lmeHLHBe4nmXzuXc0HDjk"
results = sp.audio_features(tracks=[demo_id, demo_id2])

In [39]:
# results[0]

In [40]:
## let's make the API call for every 100 songs, since that's the limit of tracks for a single API call

## we have some NaNs from our response on the Search API (type float), so filter those id's that are nan (keep only strings) --> we end up with 8500 id's
filtered_ids = [i for i in song_ids if type(i) == str]

## let's turn the big list of ids into chunks of 100, since the tracks API call has a max 100 ids you can pass as an argument
chunks = [filtered_ids[x:x+100] for x in range(0, len(filtered_ids), 100)]

## save the API response 
chunk_responses = []

## make the API call per chunk
for chunk in tqdm(chunks):
    sub_response = sp.audio_features(tracks=chunk)
    chunk_responses.append(sub_response)



  0%|          | 0/234 [00:00<?, ?it/s]

In [22]:
# ## gotta unpack the responses to get it into a nice dictionary key: song_id, value: track_features
# responses_dict = {}

# for n in range(len(chunk_responses)):
#     for chunk in chunk_responses[n]:
#         responses_dict[chunk["id"]] = chunk
#         # print(i[])
#         # break
#         # responses_dict[i["id"]] = i 

TypeError: 'NoneType' object is not subscriptable

In [41]:
## let's flatten our chunked list of lists
flat_chunk_responses = [v for sublist in chunk_responses for v in sublist]
flat_chunk_responses = [i for i in flat_chunk_responses if i is not None]

## create a dictionary with key:songID, value:trackFeatures
responses_dict = {}
for resp in flat_chunk_responses:
    # print(resp["id"])
    # print(resp)
    # break
    responses_dict[resp["id"]] = resp

In [42]:
len(responses_dict)

23140

In [43]:
## let's make the above dict of dicts into a nice dataframe
## we can drop the columns we don't want from here
track_features_df = pd.DataFrame(responses_dict).T

In [44]:
track_features_df.head()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
3YOhXYCLFRQxEmUlzjiWEJ,0.537,0.81,8,-6.213,0,0.129,0.000709,0.892,0.0579,0.667,134.096,audio_features,3YOhXYCLFRQxEmUlzjiWEJ,spotify:track:3YOhXYCLFRQxEmUlzjiWEJ,https://api.spotify.com/v1/tracks/3YOhXYCLFRQx...,https://api.spotify.com/v1/audio-analysis/3YOh...,136933,4
53RfjM48r9xNywpDiog6zG,0.342,0.377,11,-13.728,1,0.0401,0.943,0.106,0.122,0.506,76.672,audio_features,53RfjM48r9xNywpDiog6zG,spotify:track:53RfjM48r9xNywpDiog6zG,https://api.spotify.com/v1/tracks/53RfjM48r9xN...,https://api.spotify.com/v1/audio-analysis/53Rf...,256827,3
73vcZCFNErwqHxuS1BXH0e,0.663,0.614,2,-7.712,1,0.249,0.0771,0.00689,0.296,0.248,82.06,audio_features,73vcZCFNErwqHxuS1BXH0e,spotify:track:73vcZCFNErwqHxuS1BXH0e,https://api.spotify.com/v1/tracks/73vcZCFNErwq...,https://api.spotify.com/v1/audio-analysis/73vc...,87800,4
5V2GVAhUtjXwEfYNUjDUyz,0.163,0.262,1,-9.781,1,0.0306,0.792,0.167,0.116,0.278,170.527,audio_features,5V2GVAhUtjXwEfYNUjDUyz,spotify:track:5V2GVAhUtjXwEfYNUjDUyz,https://api.spotify.com/v1/tracks/5V2GVAhUtjXw...,https://api.spotify.com/v1/audio-analysis/5V2G...,205347,4
0Qdy0Vu9xir8mjc6iQ6vTA,0.262,0.753,9,-11.859,1,0.0987,0.33,9e-06,0.718,0.087,138.553,audio_features,0Qdy0Vu9xir8mjc6iQ6vTA,spotify:track:0Qdy0Vu9xir8mjc6iQ6vTA,https://api.spotify.com/v1/tracks/0Qdy0Vu9xir8...,https://api.spotify.com/v1/audio-analysis/0Qdy...,101947,4


In [45]:
track_features_df.shape

(23140, 18)

In [46]:
## let's write it to a CSV
# track_features_df.to_csv("data/dataTracks.csv", index=False)