# Spotify track features analysis

In [2]:
from spotipy import Spotify
import pandas as pd
import plotly.express as px

## Login and get data

I used the accompanying `login.py` script to generate an access token, which can be supplied directly as to the client initializer.

In [3]:
sp = Spotify(auth="REDACTED")

Start off by getting my playlists, my saved songs, and my most recent plays.

In [4]:
playlists = sp.current_user_playlists()['items']

In [5]:
plists = [{"ID": x['id'], "Name": x['name']} for x in playlists]
plists = pd.DataFrame(plists)
plists.head()

Unnamed: 0,ID,Name
0,7wwwcxcewcYqVu1X9F0B4g,melodic house? (2)
1,05uRh5eogpZNyLGXLzLPrW,buffalo
2,42czoai1Dhdl7iF0JVPSLF,car time
3,4po0sG9mdx1phmlhH8adQL,dark and wavvy
4,21k0bkuYPlawkg6w2429rn,where's jon - part i


In [6]:
recents = sp.current_user_recently_played()

In [7]:
_recents = list()
for i in recents['items']:
    t = i['track']
    artists = ",".join(x['name'] for x in t['artists'])
    artist_ids = ",".join(x['id'] for x in t['artists'])
    name = t['name']
    track_id = t['id']
    popularity = t['popularity']
    album_id = t['album']['id']
    _recents.append({
        "Artists": artists,
        "Artist IDs": artist_ids,
        "Name": name,
        "Track ID": track_id,
        "Popularity": popularity,
        "Album ID": album_id
    })

recents = pd.DataFrame(_recents)
recents.head()

Unnamed: 0,Artists,Artist IDs,Name,Track ID,Popularity,Album ID
0,"Satellite Empire,Last Heroes","1DGpuIJ6KAI5bcaFGbJZJs,3HHfEn7yPOy3IiHS6CHG97",Thrones - Last Heroes Remix,7EwNGyM05261K8rxXvtSg4,43,1CNez1QRKO31AHBu1UEp44
1,"Crystal Skies,Derek Joel,Satellite Empire,Ben ...","6HraHl4uwIQFixUu3J49BK,46OtpY1n8PieZ55v1gujZJ,...",The Chasm - Ben Walter and Satellite Empire Remix,0M0NxwnVNQfLVclV852dQ6,37,58flbnRH10XtbMxqkd4s4T
2,"Last Heroes,Satellite Empire","3HHfEn7yPOy3IiHS6CHG97,1DGpuIJ6KAI5bcaFGbJZJs",Take Your Time (feat. Satellite Empire),2piFAX26ski1mMmhWGnZQ8,35,4oQg6odo4r4sOG1GbG5TDk
3,"Crystal Skies,Derek Joel,Satellite Empire,Ben ...","6HraHl4uwIQFixUu3J49BK,46OtpY1n8PieZ55v1gujZJ,...",The Chasm - Ben Walter and Satellite Empire Remix,0M0NxwnVNQfLVclV852dQ6,37,58flbnRH10XtbMxqkd4s4T
4,"Last Heroes,Satellite Empire","3HHfEn7yPOy3IiHS6CHG97,1DGpuIJ6KAI5bcaFGbJZJs",Take Your Time (feat. Satellite Empire),2piFAX26ski1mMmhWGnZQ8,35,4oQg6odo4r4sOG1GbG5TDk


In [8]:
page_size = 50
saved_songs_res = sp.current_user_saved_tracks(limit=page_size)
saved_tracks = saved_songs_res['items']
total = saved_songs_res['total']
off = page_size
while off < total + page_size:
    next_res = sp.current_user_saved_tracks(offset=off, limit=page_size)
    saved_tracks.extend(next_res['items'])
    off += page_size

In [9]:
_saved_tracks = list()
for i in saved_tracks:
    t = i['track']
    artists = ",".join(x['name'] for x in t['artists'])
    artist_ids = ",".join(x['id'] for x in t['artists'])
    name = t['name']
    track_id = t['id']
    popularity = t['popularity']
    album_id = t['album']['id']
    added_at = i['added_at']
    _saved_tracks.append({
        "Artists": artists,
        "Artist IDs": artist_ids,
        "Name": name,
        "Track ID": track_id,
        "Popularity": popularity,
        "Album ID": album_id,
        "Added at": added_at 
    })

saved_tracks = pd.DataFrame(_saved_tracks)
saved_tracks.head()

Unnamed: 0,Artists,Artist IDs,Name,Track ID,Popularity,Album ID,Added at
0,Grum,3VEqFWRt47xQAZJMBF3duQ,Lose Control,0bqjS54zmDlYanW8mlx28k,54,52beQVaTRkEsdip6N4nWZo,2022-03-05T00:13:06Z
1,KASHIWA Daisuke,5sGsy5o8hBSMmDUFTC5Q2P,april.#02,6P3bha1HLEaiwaDA5SlTRB,19,2Gl1cVFZAs3DX3uvaTHCHG,2022-02-15T04:16:16Z
2,Memtrix,3bffaBH7akOhsSLsVJcJug,All You Are,1Pz2FNGmyvRYnlKd58ANEW,52,4CF7zjsdRMyUjjvQ2cvb6l,2022-01-10T01:03:44Z
3,"Sound Quelle,Matt Fax","5mdTuNl23tON1WlsVbvD18,1XgI1X3xjXCKRP1ZjhqgkV",Sunburst,67b4kyYW9PcSu9ChwywKYe,36,1MYWKh9RzeEcToi4chhpoC,2022-01-09T22:26:30Z
4,"Last Heroes,Satellite Empire","3HHfEn7yPOy3IiHS6CHG97,1DGpuIJ6KAI5bcaFGbJZJs",Take Your Time (feat. Satellite Empire),2piFAX26ski1mMmhWGnZQ8,35,4oQg6odo4r4sOG1GbG5TDk,2022-01-01T23:53:47Z


In [10]:
saved_tracks.shape

(286, 7)

Now that we have basic track info, we can make more requests for some more granular track features. That's the data we _really_ want.

In [11]:
track_ids = saved_tracks['Track ID'].tolist()
features = list()
page_size = 100
i = 0
while i < len(track_ids) + page_size:
    _features = sp.audio_features(track_ids[i:i + page_size])
    features.extend(_features)
    i += page_size


In [12]:
features_df = pd.DataFrame(features[:-1])
features_df = features_df[['id', 'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'liveness', 'valence', 'tempo', 'duration_ms', 'time_signature']]
features_df.head()

Unnamed: 0,id,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,liveness.1,valence,tempo,duration_ms,time_signature
0,0bqjS54zmDlYanW8mlx28k,0.542,0.857,2,-6.133,0,0.0352,0.00113,0.571,0.0779,0.0779,0.162,128.011,337500,4
1,6P3bha1HLEaiwaDA5SlTRB,0.387,0.47,0,-8.508,0,0.0476,0.613,0.859,0.133,0.133,0.219,119.95,1662987,4
2,1Pz2FNGmyvRYnlKd58ANEW,0.349,0.696,3,-3.274,0,0.0362,0.0137,0.00407,0.116,0.116,0.0782,173.901,330862,4
3,67b4kyYW9PcSu9ChwywKYe,0.638,0.823,0,-9.17,1,0.0401,0.0379,0.902,0.187,0.187,0.0384,124.017,210968,4
4,2piFAX26ski1mMmhWGnZQ8,0.475,0.44,2,-7.699,1,0.0297,0.126,3e-06,0.107,0.107,0.0584,95.043,267790,4


In [13]:
track_ids = recents['Track ID']
features = sp.audio_features(track_ids.tolist())
recents_features_df = pd.DataFrame(features)[['id', 'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'liveness', 'valence', 'tempo', 'duration_ms', 'time_signature']]
recents_features_df.head()

Unnamed: 0,id,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,liveness.1,valence,tempo,duration_ms,time_signature
0,7EwNGyM05261K8rxXvtSg4,0.246,0.551,6,-5.771,0,0.0501,0.631,4.8e-05,0.0937,0.0937,0.107,84.662,264000,4
1,0M0NxwnVNQfLVclV852dQ6,0.342,0.798,1,-4.224,1,0.054,0.145,0.0,0.358,0.358,0.253,134.713,286587,4
2,2piFAX26ski1mMmhWGnZQ8,0.475,0.44,2,-7.699,1,0.0297,0.126,3e-06,0.107,0.107,0.0584,95.043,267790,4
3,0M0NxwnVNQfLVclV852dQ6,0.342,0.798,1,-4.224,1,0.054,0.145,0.0,0.358,0.358,0.253,134.713,286587,4
4,2piFAX26ski1mMmhWGnZQ8,0.475,0.44,2,-7.699,1,0.0297,0.126,3e-06,0.107,0.107,0.0584,95.043,267790,4


In [14]:
master_features_df = pd.concat([features_df, recents_features_df])

In [15]:
master_features_df = master_features_df.drop_duplicates(subset='id')
master_features_df.shape

(319, 15)

In [16]:
master_features_df.head()

Unnamed: 0,id,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,liveness.1,valence,tempo,duration_ms,time_signature
0,0bqjS54zmDlYanW8mlx28k,0.542,0.857,2,-6.133,0,0.0352,0.00113,0.571,0.0779,0.0779,0.162,128.011,337500,4
1,6P3bha1HLEaiwaDA5SlTRB,0.387,0.47,0,-8.508,0,0.0476,0.613,0.859,0.133,0.133,0.219,119.95,1662987,4
2,1Pz2FNGmyvRYnlKd58ANEW,0.349,0.696,3,-3.274,0,0.0362,0.0137,0.00407,0.116,0.116,0.0782,173.901,330862,4
3,67b4kyYW9PcSu9ChwywKYe,0.638,0.823,0,-9.17,1,0.0401,0.0379,0.902,0.187,0.187,0.0384,124.017,210968,4
4,2piFAX26ski1mMmhWGnZQ8,0.475,0.44,2,-7.699,1,0.0297,0.126,3e-06,0.107,0.107,0.0584,95.043,267790,4


In [17]:
fig = px.scatter_3d(master_features_df, x='danceability', y='energy', z='speechiness')
fig.show()