In [14]:
from pathlib import Path
from IPython.display import display

import numpy as np
import pandas as pd

import spotipy
import lib_spotify_app.api_adapter as api_adapter

pd.set_option('max_columns', None)

In [15]:
credential_fp = Path(r'private/spotify_credential.json')

I download all the saved/liked tracks from my Spotify account and their audio features:
https://developer.spotify.com/documentation/web-api/reference/tracks/get-several-audio-features/

In [16]:
sp = api_adapter.setup_spotipy(
    credential_fp,
    scope=['user-library-read','user-top-read'],
    cache_path=Path(r'private')
)

df = api_adapter.query_liked_songs(sp)
df = api_adapter.enrich_audiofeature(df, sp, col="track.id")

In [17]:
df = df.drop('index', axis=1)
df['added_at'] = pd.to_datetime(df['added_at'])

I notice that I can know when I saved/liked the song, I would like to know when I was the most active:

Cleaning of the columns for analysis, names are too complex and I will concatenate the "artists" into a list column

In [18]:
df.columns = df.columns.str.replace('^(track\.)(id\.)?', '')
df = df.loc[:,~df.columns.duplicated()]
df.columns.values

array(['added_at', 'album.album_type',
       'album.artists.0.external_urls.spotify', 'album.artists.0.href',
       'album.artists.0.id', 'album.artists.0.name',
       'album.artists.0.type', 'album.artists.0.uri',
       'album.available_markets', 'album.external_urls.spotify',
       'album.href', 'album.id', 'album.images.0.height',
       'album.images.0.url', 'album.images.0.width',
       'album.images.1.height', 'album.images.1.url',
       'album.images.1.width', 'album.images.2.height',
       'album.images.2.url', 'album.images.2.width', 'album.name',
       'album.release_date', 'album.release_date_precision',
       'album.total_tracks', 'album.type', 'album.uri',
       'artists.0.external_urls.spotify', 'artists.0.href',
       'artists.0.id', 'artists.0.name', 'artists.0.type',
       'artists.0.uri', 'available_markets', 'disc_number', 'duration_ms',
       'explicit', 'external_ids.isrc', 'external_urls.spotify', 'href',
       'id', 'is_local', 'name', 'popularity'

Concatenate the artists values into 1 column for:
* names
* id

In [19]:
df['artists.name'] = df.filter(regex='^artists\.\d+\.name')\
                       .apply(lambda x: x.dropna().to_list(), axis=1)
df['artists.id'] = df.filter(regex='^artists\.\d+\.id')\
                     .apply(lambda x: x.dropna().to_list(), axis=1)
df['duration_min'] = df['duration_ms'] / 60000

In [20]:
df.to_csv(Path(r'private/data.csv'))

We can use the LastFM app to enrich the songs features with the number of listening from the user

In [22]:
last_api = api_adapter.setup_lastfm(Path('private', 'lastfm_credential.json'))
top_tracks_lastfm = pd.DataFrame(
    last_api.user.get_top_tracks(period='overall', limit=500)
)
display(top_tracks_lastfm)

Unnamed: 0,item,weight
0,The Garden - Call The Dogs Out,84
1,the spirit of the beehive - hypnic jerks,75
2,liily - Toro,74
3,Violent Soho - Covered in Chrome,74
4,Beach Bums - Keepaneyeout,73
...,...,...
495,Led Zeppelin - When the Levee Breaks,13
496,liily - Sold,13
497,Magic Potion - Rest Yr Skull,13
498,Metric - Help I'm Alive,13


Issue is that LastFM and Spotify don't share the same songs ID or data management.