# Spotify track features analysis

In [3]:
from spotipy import Spotify
import pandas as pd

## Login and get data

I used the accompanying `login.py` script to generate an access token, which can be supplied directly as to the client initializer.

In [13]:
sp = Spotify(auth="REDACTED")

Start off by getting my playlists, my saved songs, and my most recent plays.

In [14]:
playlists = sp.current_user_playlists()['items']

In [15]:
plists = [{"ID": x['id'], "Name": x['name']} for x in playlists]
plists = pd.DataFrame(plists)
plists.head()

Unnamed: 0,ID,Name
0,7wwwcxcewcYqVu1X9F0B4g,melodic house? (2)
1,05uRh5eogpZNyLGXLzLPrW,buffalo
2,42czoai1Dhdl7iF0JVPSLF,car time
3,4po0sG9mdx1phmlhH8adQL,dark and wavvy
4,21k0bkuYPlawkg6w2429rn,where's jon - part i


In [16]:
recents = sp.current_user_recently_played()

In [17]:
_recents = list()
for i in recents['items']:
    t = i['track']
    artists = ",".join(x['name'] for x in t['artists'])
    artist_ids = ",".join(x['id'] for x in t['artists'])
    name = t['name']
    track_id = t['id']
    popularity = t['popularity']
    album_id = t['album']['id']
    _recents.append({
        "Artists": artists,
        "Artist IDs": artist_ids,
        "Name": name,
        "Track ID": track_id,
        "Popularity": popularity,
        "Album ID": album_id
    })

recents = pd.DataFrame(_recents)
recents.head()

Unnamed: 0,Artists,Artist IDs,Name,Track ID,Popularity,Album ID
0,LAR,2w3NdJswSn39l3TU9vIRq0,I Miss You,5bCIhRCCwWrnCneW8eQDOu,20,7tea0tzJU7KHFswuoNFK4U
1,Koven,3UCbp6D1lvILlxRJT9LnFa,Final Call,2F1kM1ytnyZ1J0T3Qzf1Xe,37,6einAr3xXFNUMxTxIsL92x
2,BetweenUs,2kHgZ7ViQ4qpvADuz1TrlK,All I See,18weulVGrDukxgOaUlnQJO,40,6E49Bc08zwsBwpujVP9sFJ
3,Nora En Pure,24DO0PijjITGIEWsO8XaPs,Luscious Rain,5Cq74SxpiW6llWC4JfKbe3,55,5X7TUEwe3dnQKeqN3qBQZv
4,"Asaf Avidan,The Mojos,Wankelmut","7t51dSX8ZkKC7VoKRd0lME,6PB8wVL9gRsP8QTt2cRZPF,...",One Day / Reckoning Song (Wankelmut Remix) - C...,36Nedlx9aSx0lHaorvdqBJ,40,6ZtATnsRYU8ZP4JD0dH0fs


In [36]:
page_size = 50
saved_songs_res = sp.current_user_saved_tracks(limit=page_size)
saved_tracks = saved_songs_res['items']
total = saved_songs_res['total']
off = page_size
while off < total + page_size:
    next_res = sp.current_user_saved_tracks(offset=off, limit=page_size)
    saved_tracks.extend(next_res['items'])
    off += page_size

In [39]:
_saved_tracks = list()
for i in saved_tracks:
    t = i['track']
    artists = ",".join(x['name'] for x in t['artists'])
    artist_ids = ",".join(x['id'] for x in t['artists'])
    name = t['name']
    track_id = t['id']
    popularity = t['popularity']
    album_id = t['album']['id']
    added_at = i['added_at']
    _saved_tracks.append({
        "Artists": artists,
        "Artist IDs": artist_ids,
        "Name": name,
        "Track ID": track_id,
        "Popularity": popularity,
        "Album ID": album_id,
        "Added at": added_at 
    })

saved_tracks = pd.DataFrame(_saved_tracks)
saved_tracks.head()

Unnamed: 0,Artists,Artist IDs,Name,Track ID,Popularity,Album ID,Added at
0,Grum,3VEqFWRt47xQAZJMBF3duQ,Lose Control,0bqjS54zmDlYanW8mlx28k,54,52beQVaTRkEsdip6N4nWZo,2022-03-05T00:13:06Z
1,KASHIWA Daisuke,5sGsy5o8hBSMmDUFTC5Q2P,april.#02,6P3bha1HLEaiwaDA5SlTRB,19,2Gl1cVFZAs3DX3uvaTHCHG,2022-02-15T04:16:16Z
2,Memtrix,3bffaBH7akOhsSLsVJcJug,All You Are,1Pz2FNGmyvRYnlKd58ANEW,52,4CF7zjsdRMyUjjvQ2cvb6l,2022-01-10T01:03:44Z
3,"Sound Quelle,Matt Fax","5mdTuNl23tON1WlsVbvD18,1XgI1X3xjXCKRP1ZjhqgkV",Sunburst,67b4kyYW9PcSu9ChwywKYe,36,1MYWKh9RzeEcToi4chhpoC,2022-01-09T22:26:30Z
4,"Last Heroes,Satellite Empire","3HHfEn7yPOy3IiHS6CHG97,1DGpuIJ6KAI5bcaFGbJZJs",Take Your Time (feat. Satellite Empire),2piFAX26ski1mMmhWGnZQ8,35,4oQg6odo4r4sOG1GbG5TDk,2022-01-01T23:53:47Z


In [41]:
saved_tracks.shape

(286, 7)

Now that we have basic track info, we can make more requests for some more granular track features. That's the data we _really_ want.