# Spotify track features analysis

In [None]:
from spotipy import Spotify
import pandas as pd
import plotly.express as px

## Login and get data

I used the accompanying `login.py` script to generate an access token, which can be supplied directly as to the client initializer.

In [None]:
sp = Spotify(auth="REDACTED")

Start off by getting my playlists, my saved songs, and my most recent plays.

In [None]:
playlists = sp.current_user_playlists()['items']

In [None]:
plists = [{"ID": x['id'], "Name": x['name']} for x in playlists]
plists = pd.DataFrame(plists)
plists.head()

In [None]:
recents = sp.current_user_recently_played()

In [None]:
_recents = list()
for i in recents['items']:
    t = i['track']
    artists = ", ".join(x['name'] for x in t['artists'])
    artist_ids = ",".join(x['id'] for x in t['artists'])
    name = t['name']
    track_id = t['id']
    popularity = t['popularity']
    album_id = t['album']['id']
    _recents.append({
        "Artists": artists,
        "Artist IDs": artist_ids,
        "Name": name,
        "Track ID": track_id,
        "Popularity": popularity,
        "Album ID": album_id
    })

recents = pd.DataFrame(_recents)
recents.head()

In [None]:
page_size = 50
saved_songs_res = sp.current_user_saved_tracks(limit=page_size)
saved_tracks = saved_songs_res['items']
total = saved_songs_res['total']
off = page_size
while off < total + page_size:
    next_res = sp.current_user_saved_tracks(offset=off, limit=page_size)
    saved_tracks.extend(next_res['items'])
    off += page_size

In [None]:
_saved_tracks = list()
for i in saved_tracks:
    t = i['track']
    artists = ", ".join(x['name'] for x in t['artists'])
    artist_ids = ",".join(x['id'] for x in t['artists'])
    name = t['name']
    track_id = t['id']
    popularity = t['popularity']
    album_id = t['album']['id']
    _saved_tracks.append({
        "Artists": artists,
        "Artist IDs": artist_ids,
        "Name": name,
        "Track ID": track_id,
        "Popularity": popularity,
        "Album ID": album_id
    })

saved_tracks = pd.DataFrame(_saved_tracks)
saved_tracks.head()

In [None]:
saved_tracks.shape

Now that we have basic track info, we can make more requests for some more granular track features. That's the data we _really_ want.

In [None]:
track_ids = saved_tracks['Track ID'].tolist()
features = list()
page_size = 100
i = 0
while i < len(track_ids) + page_size:
    _features = sp.audio_features(track_ids[i:i + page_size])
    features.extend(_features)
    i += page_size


In [None]:
features_df = pd.DataFrame(features[:-1])
features_df = features_df[['id', 'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms', 'time_signature']]
features_df.head()

In [None]:
track_ids = recents['Track ID']
features = sp.audio_features(track_ids.tolist())
recents_features_df = pd.DataFrame(features)[['id', 'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms', 'time_signature']]
recents_features_df.head()

In [None]:
master_features_df = pd.concat([features_df, recents_features_df])

In [None]:
master_features_df = master_features_df.drop_duplicates(subset='id')
master_features_df.shape

In [None]:
human_song_info = pd.concat([recents, saved_tracks])
master_df = master_features_df.merge(right=human_song_info, how='right', left_on='id', right_on='Track ID')
master_df['hoverdata'] = master_df['Artists'] + " - " + master_df["Name"]
master_df.head()

In [None]:
fig = px.scatter_3d(master_df, x='danceability', y='energy', z='speechiness', width=800, height=600, color='valence', color_continuous_scale=['#ff0000', '#0000ff'], hover_name='hoverdata')
zoom_level = 1.3
camera = dict(
    eye=dict(x=zoom_level, y=zoom_level, z=zoom_level),
    center_z=-0.25
)
fig.update_layout(scene_camera=camera, title='Speechiness, Energy, and Danceability of saved tracks and most recent 50 tracks', margin=dict(b=0, l=0, r=0, t=50))
fig.show()