# Summary:

For each track with track_id, get audio features, ISRC, and other track data. ISRC will be used to find lyrics in the next notebook.

## Input: 
 - 18074 tracks with track_id.

## Ouput:
 - 18049 tracks have audio features. The [13 audio features](https://developer.spotify.com/documentation/web-api/reference/#/operations/get-audio-features) are: danceability, energy, key, loudness, mode, speechiness, acousticness, instrumentalness, liveness, valence, tempo, duration_ms, time_signature.
 
- 18074 tracks have ISRC, and other information including popularity and preview_url. 

# Import modules and credentials

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
import spotipy
import os
import sys
import re
import time 
%matplotlib inline

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

from dotenv import load_dotenv
%load_ext dotenv
%dotenv


client_id = os.environ['SPOTIFY_CLIENT_ID']
client_secret = os.environ['SPOTIFY_CLIENT_SECRET']
spotify = spotipy.Spotify(
    client_credentials_manager=SpotifyClientCredentials(client_id = client_id, client_secret=client_secret))

# Load Track IDs

In [2]:
album_tracks_df = pd.read_csv("../data/album_tracks.csv")
single_tracks_df = pd.read_csv("../data/single_ids.csv")


display(album_tracks_df.head(3))
display(single_tracks_df[['Album_Name','Track_ID']].head(3))

print ("Number of tracks (from albums excluding singles): ", album_tracks_df.shape[0])
print ("Number of tracks (from singles): ", single_tracks_df.shape[0])
print ("Total Number of tracks: ", album_tracks_df.shape[0]+single_tracks_df.shape[0])

Unnamed: 0,Album_ID,Track_ID
0,3Bj9gANIphLWtsNj3mw1bI,2FPQI1LRwWszttbRG8hknk
1,3Bj9gANIphLWtsNj3mw1bI,6pOoswwC1lNBI2TapMdaEW
2,3Bj9gANIphLWtsNj3mw1bI,2EEwSq98rKwlRWT7sNCLRc


Unnamed: 0,Album_Name,Track_ID
0,"""All Is Love"" (CD single)",6SExQ9H4mX00FIhjffFM2z
1,"""Wash Your Hands"" (CD single)",5gzOuRPg48fglThPc84goP
2,"""Just Like a Rockstar"" (CD Single)",1b8NAn2ehmesKq4luLIfAc


Number of tracks (from albums excluding singles):  17507
Number of tracks (from singles):  567
Total Number of tracks:  18074


# Examples: Get audio features and track data

## Example: Get audio features 

In [3]:
track_id = '6SExQ9H4mX00FIhjffFM2z'

audio_feature = spotify.audio_features(track_id)

print (audio_feature[0].keys(), "\n")
print (audio_feature)


dict_keys(['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'type', 'id', 'uri', 'track_href', 'analysis_url', 'duration_ms', 'time_signature']) 

[{'danceability': 0.694, 'energy': 0.545, 'key': 0, 'loudness': -9.503, 'mode': 1, 'speechiness': 0.0377, 'acousticness': 0.311, 'instrumentalness': 0.000108, 'liveness': 0.694, 'valence': 0.285, 'tempo': 120.02, 'type': 'audio_features', 'id': '6SExQ9H4mX00FIhjffFM2z', 'uri': 'spotify:track:6SExQ9H4mX00FIhjffFM2z', 'track_href': 'https://api.spotify.com/v1/tracks/6SExQ9H4mX00FIhjffFM2z', 'analysis_url': 'https://api.spotify.com/v1/audio-analysis/6SExQ9H4mX00FIhjffFM2z', 'duration_ms': 169867, 'time_signature': 4}]


## Example: Get track data and ISRC

In [13]:
track_id = '6SExQ9H4mX00FIhjffFM2z'

track = spotify.track(track_id)

print (track.keys(),"\n")
print (track['external_ids'],"\n")
print (track['artists'],"\n")
print (track['name'])
print ("\n")
print (track['album']['images'][0]['url'])   # album image
print ("preview_url: ", track['preview_url'])

dict_keys(['album', 'artists', 'available_markets', 'disc_number', 'duration_ms', 'explicit', 'external_ids', 'external_urls', 'href', 'id', 'is_local', 'name', 'popularity', 'preview_url', 'track_number', 'type', 'uri']) 

{'isrc': 'USUM70980425'} 

[{'external_urls': {'spotify': 'https://open.spotify.com/artist/6emHCSoB4tJxTVXakbrpPz'}, 'href': 'https://api.spotify.com/v1/artists/6emHCSoB4tJxTVXakbrpPz', 'id': '6emHCSoB4tJxTVXakbrpPz', 'name': 'Karen O', 'type': 'artist', 'uri': 'spotify:artist:6emHCSoB4tJxTVXakbrpPz'}, {'external_urls': {'spotify': 'https://open.spotify.com/artist/5ITDRCbxYSx3gd19M8DFVx'}, 'href': 'https://api.spotify.com/v1/artists/5ITDRCbxYSx3gd19M8DFVx', 'id': '5ITDRCbxYSx3gd19M8DFVx', 'name': 'The Kids', 'type': 'artist', 'uri': 'spotify:artist:5ITDRCbxYSx3gd19M8DFVx'}] 

All Is Love


https://i.scdn.co/image/ab67616d0000b273ad8debadaca8c52a42578ab7
preview_url:  None


#  Get audio features

In [14]:
tracks = album_tracks_df['Track_ID'].tolist() + single_tracks_df['Track_ID'].tolist()
print (len(tracks))

18074


In [14]:
song_features = ['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', \
                'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', \
                'type', 'id', 'uri', 'track_href', 'analysis_url', 'duration_ms', 'time_signature']

track_features = []
for i in range(0,len(tracks),100):
    features = spotify.audio_features(tracks[i:i+100])
    features = [f for f in features if f]   # filter out null features
    track_features += features


In [24]:
# Save to a data frame
track_features_df = pd.DataFrame(track_features, columns = song_features )

id_col = track_features_df.pop('id')               # Move 'Track_ID' to the first column
track_features_df.insert(0, 'Track_ID', id_col)
#track_features_df.to_csv("../data/track_features.csv", index = False)

In [38]:
track_features_df = pd.read_csv("../data/track_features.csv")

display(track_features_df.sample(5))

print ("Number of tracks: ", track_features_df.shape[0])


Unnamed: 0,Track_ID,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,uri,track_href,analysis_url,duration_ms,time_signature
9719,42BeMThJuzIQe1oQvhDwgl,0.288,0.86,2,-6.311,0,0.0393,0.103,0.000399,0.102,0.66,161.196,audio_features,spotify:track:42BeMThJuzIQe1oQvhDwgl,https://api.spotify.com/v1/tracks/42BeMThJuzIQ...,https://api.spotify.com/v1/audio-analysis/42Be...,201507,4
14140,5FKql7zuVTbdxWoP1eIPb7,0.556,0.565,4,-5.215,1,0.0262,0.551,0.0,0.0995,0.289,113.937,audio_features,spotify:track:5FKql7zuVTbdxWoP1eIPb7,https://api.spotify.com/v1/tracks/5FKql7zuVTbd...,https://api.spotify.com/v1/audio-analysis/5FKq...,184667,4
11932,43r9t5K0YG0xku8hYfiOep,0.858,0.65,11,-5.603,0,0.141,0.0165,0.000464,0.0505,0.716,120.004,audio_features,spotify:track:43r9t5K0YG0xku8hYfiOep,https://api.spotify.com/v1/tracks/43r9t5K0YG0x...,https://api.spotify.com/v1/audio-analysis/43r9...,187400,4
6793,6l9o6SyXq8gK6sfarFnnPQ,0.545,0.942,9,-3.374,0,0.0365,0.0105,3e-06,0.0611,0.565,140.187,audio_features,spotify:track:6l9o6SyXq8gK6sfarFnnPQ,https://api.spotify.com/v1/tracks/6l9o6SyXq8gK...,https://api.spotify.com/v1/audio-analysis/6l9o...,248773,4
17712,4jIQ0wouzKHbi1fYHORIN0,0.591,0.924,5,-4.724,0,0.0465,0.00231,0.00435,0.391,0.275,128.175,audio_features,spotify:track:4jIQ0wouzKHbi1fYHORIN0,https://api.spotify.com/v1/tracks/4jIQ0wouzKHb...,https://api.spotify.com/v1/audio-analysis/4jIQ...,322107,4


Number of tracks:  18049


# Get ISRC, artists, popularity, preview_url (30 sec audio), image_url

In [15]:
track_data = []
for i in range(0,len(tracks),50):
    data = spotify.tracks(tracks[i:i+50])
    for track in data['tracks']:
        track_id = track['id']
        name = track['name']
        popularity = track['popularity']
        isrc = track.get('external_ids').get('isrc')
        artists = [artist['name'] for artist in track['artists']]
        preview_url = track.get('preview_url')
        image_url = track['album'].get('images')[0].get('url')
        
        track_data.append([track_id, isrc, name, artists, popularity, preview_url, image_url])

In [18]:
track_data_df = pd.DataFrame(track_data, columns = ['Track_ID', 'ISRC','Track_Name','Artists','popularity','preview_url', 'image_url'])
track_data_df.to_csv("../data/track_isrc.csv", index = False)

track_data_df = pd.read_csv("../data/track_isrc.csv")
display(track_data_df.sample(3))
print ("Number of tracks: ", track_data_df.shape[0])

Unnamed: 0,Track_ID,ISRC,Track_Name,Artists,popularity,preview_url,image_url
3349,6hTGAJoUYljMsqOvClRaMJ,USWD11366386,"Heimr Àrnadalr - From ""Frozen""/Score",['Christophe Beck'],25,,https://i.scdn.co/image/ab67616d0000b2731ddef8...
1300,77efP89pjvmI14MBGimCpn,USB440417906,On The Good Ship Lollipop,['Maria Muldaur'],1,,https://i.scdn.co/image/ab67616d0000b273a311a4...
12209,1mngj46jZenK3JkgZriOiV,USVI20301307,Uncool,['Courtney Love'],25,,https://i.scdn.co/image/ab67616d0000b273522c01...


Number of tracks:  18074
