In [1]:
from smoothie.auth.AuthorizationHandler import AuthorizationHandler
from smoothie.data_objects.User import User
from smoothie.data_objects.Song import Song
from smoothie.data_objects.Playlist import Playlist
from smoothie.bullet.SongProfile import SongProfile
import smoothie
import aiohttp
import urllib
import getpass
import spotify
import time
import dataclasses
import pandas as pd

In [19]:
# Reimport any changes
import importlib
importlib.reload(spotify.playlist_apis)
importlib.reload(spotify.track_apis)
importlib.reload(smoothie.data_objects.User)
importlib.reload(smoothie.auth.AuthorizationHandler)

from smoothie.auth.AuthorizationHandler import AuthorizationHandler

In [2]:
session = aiohttp.ClientSession()
auth_handler = AuthorizationHandler(session)

Since Spotify requires a login page with user interaction, we'll need to perform this first step in a browser page. Copy and paste the result of the next cell into a browser and approve access to your Spotify account.

In [3]:
print(f'https://accounts.spotify.com/authorize?client_id={auth_handler.spotify_client_id}&response_type=code&redirect_uri={urllib.parse.quote_plus(auth_handler._redirect_uri)}')

https://accounts.spotify.com/authorize?client_id=c6571c2bc3f444a18ad0308ab4b712f3&response_type=code&redirect_uri=https%3A%2F%2Fgoogle.com


Now, in the resulting `google.com` url, copy all the text after `code=`, and enter it when prompted in the next cell

In [4]:
code = getpass.getpass(prompt='Code from URL? ')
me = User(code=code)

Code from URL? ········


Get access/refresh tokens for use with future API calls

In [5]:
await auth_handler.request_access_token(me)

{'access_token': 'BQATxcL7NSArNjlJ9Fhj2X7M58wbN-vQQDjQdlbtZV9A3C-RI520iN8S19L93XqiDoGmsYiLbRx87fIp5coAbZfJ3QzYyW-HguGvYauiFAPxuDT99FRWq4exMDoHPs0kybsiFwh4D_9vEFAvHA',
 'token_type': 'Bearer',
 'expires_in': 3600,
 'refresh_token': 'AQCN1GcQbME1oCXoOT_YcZLuNbqOV3wqHtOB9QatnmoTaT2qeOJLK3U0X-00HWfOLBtnUdcZSUUzHnuzjPwSGCwb2BMH_TvwMdwFop5imhuh0CmSNDYTPmdt0VsJOZfL6d4'}

First, use the track data available from the playlist APIs to form a list of tracks with weights calculated based off the date they were added to the playlist. The default behavior is to add equal weight to all songs, but ideally songs added more recently would count more towards the user's song profile.

In [85]:
await spotify.user_apis.get_profile(session, me)
playlist_list = await spotify.playlist_apis.get_current_users_playlists(session, me)

song_df = pd.DataFrame(columns=[field.name for field in dataclasses.fields(Song)])
song_df.set_index('id')
weight_df = pd.DataFrame(columns=['id', 'weight'])
weight_df.set_index('id')

def weight_from_add_date(ref_time, added_time):
    return 1
    time_diff = ref_time - added_time
    # some kind of exponential drop-off

for playlist in playlist_list:
    track_list = await spotify.playlist_apis.get_playlist(session, me, playlist['id'])
    for track in track_list:
        if not track['track']['id']:
            continue
        try:
            row = song_df.loc[track['track']['id']]
            row.weight += weight_from_add_date(track['added_at'], time.time())
        except KeyError:
            weight_df.loc[track['track']['id']] = {
                'weight': weight_from_add_date(track['added_at'], time.time()),
                'id': track['track']['id']
            }

song_df = song_df.append(weight_df)


song_df



Unnamed: 0,acousticness,analysis_url,danceability,duration_ms,energy,id,instrumentalness,key,loudness,mode,speechiness,tempo,time_signature,track_href,type,uri,valence,added_at,weight
25GlFJq5QNAXyVgJvCZ4Mf,,,,,,25GlFJq5QNAXyVgJvCZ4Mf,,,,,,,,,,,,,1
1zlclNkERGFrCxznXOgkD3,,,,,,1zlclNkERGFrCxznXOgkD3,,,,,,,,,,,,,1
5bBUDJUfGcG7eFy3Bf4fXv,,,,,,5bBUDJUfGcG7eFy3Bf4fXv,,,,,,,,,,,,,1
4M2t7bP4Mq87mGMn0PObUX,,,,,,4M2t7bP4Mq87mGMn0PObUX,,,,,,,,,,,,,1
67pIYW0MEQWWkNvuXb70vC,,,,,,67pIYW0MEQWWkNvuXb70vC,,,,,,,,,,,,,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35p6JlvO7rnbC2R3zaXbXu,,,,,,35p6JlvO7rnbC2R3zaXbXu,,,,,,,,,,,,,1
1IsF8TjXjTDd7wZVX258zf,,,,,,1IsF8TjXjTDd7wZVX258zf,,,,,,,,,,,,,1
7jQhyzqfbLjQwEFUrWsaYE,,,,,,7jQhyzqfbLjQwEFUrWsaYE,,,,,,,,,,,,,1
3Plz0TdhMQNOorsOXmKdvR,,,,,,3Plz0TdhMQNOorsOXmKdvR,,,,,,,,,,,,,1


Save the data as needed so that this data doesn't have to be fetched again; the track ids are sufficient to fill in the rest of the information. However, the data will need to be re-queried if the weight function changes.

In [11]:
song_df.to_csv('jason_df.csv')

Unnamed: 0,acousticness,analysis_url,danceability,duration_ms,energy,id,instrumentalness,key,loudness,mode,speechiness,tempo,time_signature,track_href,type,uri,valence,added_at,weight


Read the data from the saved csv file. This is not needed when running the cells sequentially.

In [6]:
song_df = pd.read_csv('jason_df.csv', index_col=0)
song_df

Unnamed: 0,acousticness,analysis_url,danceability,duration_ms,energy,id,instrumentalness,key,loudness,mode,speechiness,tempo,time_signature,track_href,type,uri,valence,added_at,weight
25GlFJq5QNAXyVgJvCZ4Mf,,,,,,25GlFJq5QNAXyVgJvCZ4Mf,,,,,,,,,,,,,1
1zlclNkERGFrCxznXOgkD3,,,,,,1zlclNkERGFrCxznXOgkD3,,,,,,,,,,,,,1
5bBUDJUfGcG7eFy3Bf4fXv,,,,,,5bBUDJUfGcG7eFy3Bf4fXv,,,,,,,,,,,,,1
4M2t7bP4Mq87mGMn0PObUX,,,,,,4M2t7bP4Mq87mGMn0PObUX,,,,,,,,,,,,,1
67pIYW0MEQWWkNvuXb70vC,,,,,,67pIYW0MEQWWkNvuXb70vC,,,,,,,,,,,,,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35p6JlvO7rnbC2R3zaXbXu,,,,,,35p6JlvO7rnbC2R3zaXbXu,,,,,,,,,,,,,1
1IsF8TjXjTDd7wZVX258zf,,,,,,1IsF8TjXjTDd7wZVX258zf,,,,,,,,,,,,,1
7jQhyzqfbLjQwEFUrWsaYE,,,,,,7jQhyzqfbLjQwEFUrWsaYE,,,,,,,,,,,,,1
3Plz0TdhMQNOorsOXmKdvR,,,,,,3Plz0TdhMQNOorsOXmKdvR,,,,,,,,,,,,,1


Spotify exposes an API to get the audio features for up to 100 tracks at once. Read track ids in chunks from the dataframe and then fill in the additional data.

In [12]:
# iterate over chunks of song_df
# for all audio features in the response, update each row in song_df by id
for i in range(int(len(song_df) / 100 + 1)):
    id_list = list(song_df.id.iloc[100*i: min(100*(i+1), len(song_df))])
    audio_features = await spotify.track_apis.get_audio_features_for_several_tracks(session, me, id_list)
    for song in audio_features:
        for feature in song.keys():
            song_df.loc[song['id'], feature] = song[feature]
            
song_df

Unnamed: 0,acousticness,analysis_url,danceability,duration_ms,energy,id,instrumentalness,key,loudness,mode,speechiness,tempo,time_signature,track_href,type,uri,valence,added_at,weight,liveness
25GlFJq5QNAXyVgJvCZ4Mf,0.1730,https://api.spotify.com/v1/audio-analysis/25Gl...,0.685,312360.0,0.54500,25GlFJq5QNAXyVgJvCZ4Mf,0.000014,7.0,-6.189,1.0,0.0610,94.061,4.0,https://api.spotify.com/v1/tracks/25GlFJq5QNAX...,audio_features,spotify:track:25GlFJq5QNAXyVgJvCZ4Mf,0.5060,,1,0.2370
1zlclNkERGFrCxznXOgkD3,0.0484,https://api.spotify.com/v1/audio-analysis/1zlc...,0.703,226187.0,0.75900,1zlclNkERGFrCxznXOgkD3,0.003100,0.0,-6.554,1.0,0.1490,94.009,4.0,https://api.spotify.com/v1/tracks/1zlclNkERGFr...,audio_features,spotify:track:1zlclNkERGFrCxznXOgkD3,0.8630,,1,0.6640
5bBUDJUfGcG7eFy3Bf4fXv,0.2500,https://api.spotify.com/v1/audio-analysis/5bBU...,0.850,235507.0,0.52400,5bBUDJUfGcG7eFy3Bf4fXv,0.000002,11.0,-9.375,0.0,0.2040,120.004,4.0,https://api.spotify.com/v1/tracks/5bBUDJUfGcG7...,audio_features,spotify:track:5bBUDJUfGcG7eFy3Bf4fXv,0.6860,,1,0.1140
4M2t7bP4Mq87mGMn0PObUX,0.0992,https://api.spotify.com/v1/audio-analysis/4M2t...,0.669,208013.0,0.54100,4M2t7bP4Mq87mGMn0PObUX,0.000008,4.0,-11.291,0.0,0.0461,89.083,4.0,https://api.spotify.com/v1/tracks/4M2t7bP4Mq87...,audio_features,spotify:track:4M2t7bP4Mq87mGMn0PObUX,0.6450,,1,0.2540
67pIYW0MEQWWkNvuXb70vC,0.1600,https://api.spotify.com/v1/audio-analysis/67pI...,0.929,194818.0,0.51400,67pIYW0MEQWWkNvuXb70vC,0.000000,5.0,-7.210,1.0,0.1520,140.008,4.0,https://api.spotify.com/v1/tracks/67pIYW0MEQWW...,audio_features,spotify:track:67pIYW0MEQWWkNvuXb70vC,0.6410,,1,0.1120
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35p6JlvO7rnbC2R3zaXbXu,0.9790,https://api.spotify.com/v1/audio-analysis/35p6...,0.306,223000.0,0.13800,35p6JlvO7rnbC2R3zaXbXu,0.868000,10.0,-16.553,1.0,0.0344,82.070,3.0,https://api.spotify.com/v1/tracks/35p6JlvO7rnb...,audio_features,spotify:track:35p6JlvO7rnbC2R3zaXbXu,0.2120,,1,0.1050
1IsF8TjXjTDd7wZVX258zf,0.9450,https://api.spotify.com/v1/audio-analysis/1IsF...,0.193,289067.0,0.29500,1IsF8TjXjTDd7wZVX258zf,0.008380,0.0,-9.688,0.0,0.0345,83.688,4.0,https://api.spotify.com/v1/tracks/1IsF8TjXjTDd...,audio_features,spotify:track:1IsF8TjXjTDd7wZVX258zf,0.1550,,1,0.1060
7jQhyzqfbLjQwEFUrWsaYE,0.9890,https://api.spotify.com/v1/audio-analysis/7jQh...,0.383,400467.0,0.30200,7jQhyzqfbLjQwEFUrWsaYE,0.883000,8.0,-14.755,1.0,0.0329,134.000,5.0,https://api.spotify.com/v1/tracks/7jQhyzqfbLjQ...,audio_features,spotify:track:7jQhyzqfbLjQwEFUrWsaYE,0.2540,,1,0.1310
3Plz0TdhMQNOorsOXmKdvR,0.9950,https://api.spotify.com/v1/audio-analysis/3Plz...,0.287,343013.0,0.00376,3Plz0TdhMQNOorsOXmKdvR,0.915000,8.0,-33.258,1.0,0.0494,169.942,3.0,https://api.spotify.com/v1/tracks/3Plz0TdhMQNO...,audio_features,spotify:track:3Plz0TdhMQNOorsOXmKdvR,0.0692,,1,0.1340


In [13]:
song_df.to_csv('jason_df.csv')