# Spotify track features analysis

In [63]:
from spotipy import Spotify
import pandas as pd
import plotly.express as px

## Login and get data

I used the accompanying `login.py` script to generate an access token, which can be supplied directly as to the client initializer.

In [64]:
sp = Spotify(auth="REDACTED")

Start off by getting my playlists, my saved songs, and my most recent plays.

In [65]:
playlists = sp.current_user_playlists()['items']

In [66]:
plists = [{"ID": x['id'], "Name": x['name']} for x in playlists]
plists = pd.DataFrame(plists)
plists.head()

Unnamed: 0,ID,Name
0,7wwwcxcewcYqVu1X9F0B4g,melodic house? (2)
1,05uRh5eogpZNyLGXLzLPrW,buffalo
2,42czoai1Dhdl7iF0JVPSLF,car time
3,4po0sG9mdx1phmlhH8adQL,dark and wavvy
4,21k0bkuYPlawkg6w2429rn,where's jon - part i


In [67]:
recents = sp.current_user_recently_played()

In [68]:
_recents = list()
for i in recents['items']:
    t = i['track']
    artists = ",".join(x['name'] for x in t['artists'])
    artist_ids = ",".join(x['id'] for x in t['artists'])
    name = t['name']
    track_id = t['id']
    popularity = t['popularity']
    album_id = t['album']['id']
    _recents.append({
        "Artists": artists,
        "Artist IDs": artist_ids,
        "Name": name,
        "Track ID": track_id,
        "Popularity": popularity,
        "Album ID": album_id
    })

recents = pd.DataFrame(_recents)
recents.head()

Unnamed: 0,Artists,Artist IDs,Name,Track ID,Popularity,Album ID
0,"Crystal Skies,RUNN","6HraHl4uwIQFixUu3J49BK,3l0H4QNiYYNdIsnZ4JgJAg",Feels Right (feat. RUNN),2CNDmxxFLmuD3qyiJgJz67,43,7uNRnXAfrV0cGSmMIDgHIn
1,"Sappheiros,Esoreni","5ZVHXQZAIn9WJXvy6qn9K0,6xTK2zxXkCfE7Noni29tm3",Falling,0VlTcVzfjA8y0S9L7XEirk,48,7jlLR0cPb17mnL0FssnQd1
2,"Satellite Empire,TwoWorldsApart,Ento,Matt Rysen","1DGpuIJ6KAI5bcaFGbJZJs,29CTrnvjIYocNxv66Umt5B,...",Let Me Drown - Matt Rysen & Ento Remix,3Kybv5kFg5zj4iqeFO3hL6,25,4piI84gufy8Kxac9ng9xiX
3,Sappheiros,5ZVHXQZAIn9WJXvy6qn9K0,Reaching Out,3A0Z2vab5L5TyAXq0bJlkD,39,78Tt3nTpOYGRWiLdotZ3vC
4,Skrux,6cwRqYSDYwWsLveCcDRhyY,You & Me,1XoMhaVxtcVukieSHOpBtn,52,5XQ1fvzxybJfICorVRyKXI


In [69]:
page_size = 50
saved_songs_res = sp.current_user_saved_tracks(limit=page_size)
saved_tracks = saved_songs_res['items']
total = saved_songs_res['total']
off = page_size
while off < total + page_size:
    next_res = sp.current_user_saved_tracks(offset=off, limit=page_size)
    saved_tracks.extend(next_res['items'])
    off += page_size

In [70]:
_saved_tracks = list()
for i in saved_tracks:
    t = i['track']
    artists = ",".join(x['name'] for x in t['artists'])
    artist_ids = ",".join(x['id'] for x in t['artists'])
    name = t['name']
    track_id = t['id']
    popularity = t['popularity']
    album_id = t['album']['id']
    added_at = i['added_at']
    _saved_tracks.append({
        "Artists": artists,
        "Artist IDs": artist_ids,
        "Name": name,
        "Track ID": track_id,
        "Popularity": popularity,
        "Album ID": album_id,
        "Added at": added_at 
    })

saved_tracks = pd.DataFrame(_saved_tracks)
saved_tracks.head()

Unnamed: 0,Artists,Artist IDs,Name,Track ID,Popularity,Album ID,Added at
0,Grum,3VEqFWRt47xQAZJMBF3duQ,Lose Control,0bqjS54zmDlYanW8mlx28k,54,52beQVaTRkEsdip6N4nWZo,2022-03-05T00:13:06Z
1,KASHIWA Daisuke,5sGsy5o8hBSMmDUFTC5Q2P,april.#02,6P3bha1HLEaiwaDA5SlTRB,19,2Gl1cVFZAs3DX3uvaTHCHG,2022-02-15T04:16:16Z
2,Memtrix,3bffaBH7akOhsSLsVJcJug,All You Are,1Pz2FNGmyvRYnlKd58ANEW,52,4CF7zjsdRMyUjjvQ2cvb6l,2022-01-10T01:03:44Z
3,"Sound Quelle,Matt Fax","5mdTuNl23tON1WlsVbvD18,1XgI1X3xjXCKRP1ZjhqgkV",Sunburst,67b4kyYW9PcSu9ChwywKYe,36,1MYWKh9RzeEcToi4chhpoC,2022-01-09T22:26:30Z
4,"Last Heroes,Satellite Empire","3HHfEn7yPOy3IiHS6CHG97,1DGpuIJ6KAI5bcaFGbJZJs",Take Your Time (feat. Satellite Empire),2piFAX26ski1mMmhWGnZQ8,35,4oQg6odo4r4sOG1GbG5TDk,2022-01-01T23:53:47Z


In [71]:
saved_tracks.shape

(286, 7)

Now that we have basic track info, we can make more requests for some more granular track features. That's the data we _really_ want.

In [72]:
track_ids = saved_tracks['Track ID'].tolist()
features = list()
page_size = 100
i = 0
while i < len(track_ids) + page_size:
    _features = sp.audio_features(track_ids[i:i + page_size])
    features.extend(_features)
    i += page_size


In [73]:
features_df = pd.DataFrame(features[:-1])
features_df = features_df[['id', 'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms', 'time_signature']]
features_df.head()

Unnamed: 0,id,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,0bqjS54zmDlYanW8mlx28k,0.542,0.857,2,-6.133,0,0.0352,0.00113,0.571,0.0779,0.162,128.011,337500,4
1,6P3bha1HLEaiwaDA5SlTRB,0.387,0.47,0,-8.508,0,0.0476,0.613,0.859,0.133,0.219,119.95,1662987,4
2,1Pz2FNGmyvRYnlKd58ANEW,0.349,0.696,3,-3.274,0,0.0362,0.0137,0.00407,0.116,0.0782,173.901,330862,4
3,67b4kyYW9PcSu9ChwywKYe,0.638,0.823,0,-9.17,1,0.0401,0.0379,0.902,0.187,0.0384,124.017,210968,4
4,2piFAX26ski1mMmhWGnZQ8,0.475,0.44,2,-7.699,1,0.0297,0.126,3e-06,0.107,0.0584,95.043,267790,4


In [74]:
track_ids = recents['Track ID']
features = sp.audio_features(track_ids.tolist())
recents_features_df = pd.DataFrame(features)[['id', 'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms', 'time_signature']]
recents_features_df.head()

Unnamed: 0,id,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,2CNDmxxFLmuD3qyiJgJz67,0.474,0.714,5,-3.516,0,0.0343,0.129,0.0,0.147,0.225,139.984,212571,4
1,0VlTcVzfjA8y0S9L7XEirk,0.609,0.389,9,-16.846,0,0.031,0.598,0.893,0.0952,0.0392,125.027,253466,4
2,3Kybv5kFg5zj4iqeFO3hL6,0.274,0.51,5,-4.684,0,0.0354,0.643,0.00072,0.18,0.0398,140.054,342857,4
3,3A0Z2vab5L5TyAXq0bJlkD,0.578,0.437,3,-15.251,0,0.0372,0.399,0.887,0.364,0.0575,103.861,288993,4
4,1XoMhaVxtcVukieSHOpBtn,0.335,0.888,0,-5.395,0,0.0887,0.14,0.604,0.171,0.149,139.77,252590,4


In [75]:
master_features_df = pd.concat([features_df, recents_features_df])

In [76]:
master_features_df = master_features_df.drop_duplicates(subset='id')
master_features_df.shape

(313, 14)

In [77]:
master_features_df.head()

Unnamed: 0,id,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,0bqjS54zmDlYanW8mlx28k,0.542,0.857,2,-6.133,0,0.0352,0.00113,0.571,0.0779,0.162,128.011,337500,4
1,6P3bha1HLEaiwaDA5SlTRB,0.387,0.47,0,-8.508,0,0.0476,0.613,0.859,0.133,0.219,119.95,1662987,4
2,1Pz2FNGmyvRYnlKd58ANEW,0.349,0.696,3,-3.274,0,0.0362,0.0137,0.00407,0.116,0.0782,173.901,330862,4
3,67b4kyYW9PcSu9ChwywKYe,0.638,0.823,0,-9.17,1,0.0401,0.0379,0.902,0.187,0.0384,124.017,210968,4
4,2piFAX26ski1mMmhWGnZQ8,0.475,0.44,2,-7.699,1,0.0297,0.126,3e-06,0.107,0.0584,95.043,267790,4


In [124]:
fig = px.scatter_3d(master_features_df, x='danceability', y='energy', z='speechiness', width=800, height=600, color='valence', color_continuous_scale=['#ff0000', '#00ff00'])
zoom_level = 1.3
camera = dict(
    eye=dict(x=zoom_level, y=zoom_level, z=zoom_level),
    center_z=-0.25
)
fig.update_layout(scene_camera=camera, title='Speechiness, Energy, and Danceability of saved tracks and most recent 50 tracks', margin=dict(b=0, l=0, r=0, t=50))
fig.show()