In [3]:
%pip install spotipy
%pip install python-dotenv

import csv
import spotipy
import os
from dotenv import load_dotenv
from spotipy.oauth2 import SpotifyClientCredentials

load_dotenv('vars.env')

Collecting spotipy
  Downloading spotipy-2.22.0-py3-none-any.whl (28 kB)
Collecting redis>=3.5.3
  Downloading redis-4.4.0-py3-none-any.whl (236 kB)
     |████████████████████████████████| 236 kB 8.8 MB/s            
Collecting async-timeout>=4.0.2
  Downloading async_timeout-4.0.2-py3-none-any.whl (5.8 kB)
Installing collected packages: async-timeout, redis, spotipy
Successfully installed async-timeout-4.0.2 redis-4.4.0 spotipy-2.22.0
Note: you may need to restart the kernel to use updated packages.
Collecting python-dotenv
  Downloading python_dotenv-0.21.0-py3-none-any.whl (18 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-0.21.0
Note: you may need to restart the kernel to use updated packages.


True

In [6]:
# Load the client secret from environment variables
client_id = os.environ['SPOTIPY_CLIENT_ID']
client_secret = os.environ['SPOTIPY_CLIENT_SECRET']

client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

Playlist function referenced from https://towardsdatascience.com/how-to-create-large-music-datasets-using-spotipy-40e7242cc6a6

In [9]:
import pandas as pd

In [20]:
def call_playlist(creator, playlist_id):
    
    #step1

    playlist_features_list = ["artist", "album", "track_name", "track_id", "duration_ms", "danceability", "valence", "energy", "tempo", "loudness", "speechiness", "instrumentalness", "liveness", "acousticness"]
    
    playlist_df = pd.DataFrame(columns = playlist_features_list)
    
    #step2
    
    playlist = sp.user_playlist_tracks(creator, playlist_id)["items"]
    for track in playlist:
        # Create empty dict
        playlist_features = {}
        # Get metadata
        playlist_features["artist"] = track["track"]["album"]["artists"][0]["name"]
        playlist_features["album"] = track["track"]["album"]["name"]
        playlist_features["track_name"] = track["track"]["name"]
        playlist_features["track_id"] = track["track"]["id"]
        
        # Get audio features
        audio_features = sp.audio_features(playlist_features["track_id"])[0]
        for feature in playlist_features_list[4:]:
            playlist_features[feature] = audio_features[feature]
        
        # Concat the dfs
        track_df = pd.DataFrame(playlist_features, index = [0])
        playlist_df = pd.concat([playlist_df, track_df], ignore_index = True)

    #Step 3
        
    return playlist_df

def call_playlist_dict(playlist_dict):
    
    # Loop through every playlist in the dict and analyze it
    for i, (key, val) in enumerate(playlist_dict.items()):
        playlist_df = call_playlist(*val)
        # Add a playlist column so that we can see which playlist a track belongs too
        playlist_df["playlist"] = key
        # Create or concat df
        if i == 0:
            playlist_dict_df = playlist_df
        else:
            playlist_dict_df = pd.concat([playlist_dict_df, playlist_df], ignore_index = True)
            
    return playlist_dict_df

To make this a classification problem, I am going to use three playlists: My Top Songs 2022, 2020, 2018 and rank them 1, 0.5, and 0.

In [23]:
top22 = call_playlist("mlnee", "1LX2YGoVLvzSeoCIloxAfO")
top22["rating"] = 1

In [24]:
top20 = call_playlist("mlnee", "37i9dQZF1EMdMQqI7Rqh9N")
top20["rating"] = 0.5

In [26]:
top18 = call_playlist("mlnee", "37i9dQZF1EjnhmPwMNUqUJ")
top18["rating"] = 0

In [27]:
top_playlists = pd.concat([top22, top20])
top_playlists = pd.concat([top_playlists, top18])

In [28]:
top_playlists

Unnamed: 0,artist,album,track_name,track_id,duration_ms,danceability,valence,energy,tempo,loudness,speechiness,instrumentalness,liveness,acousticness,rating
0,The Weeknd,Dawn FM,Is There Someone Else?,0mL82sxCRjrs3br407IdJh,199111,0.702,0.608,0.586,134.951,-7.83,0.0313,0.000073,0.156,0.0388,1.0
1,The Weeknd,Dawn FM,"Here We Go… Again (feat. Tyler, the Creator)",1NhjYYcYTRywc0di98xHxf,209778,0.469,0.198,0.623,134.943,-4.432,0.0302,0,0.588,0.345,1.0
2,Father John Misty,Pure Comedy,Pure Comedy,0HbFgNSVPMfZLXcmkm5Q42,383800,0.463,0.157,0.262,130.228,-11.655,0.0404,0,0.215,0.816,1.0
3,Rina Sawayama,SAWAYAMA,XS,1TWNKyNQOBfNUkWWs7FooF,201060,0.635,0.59,0.929,117.039,-3.256,0.146,0.000001,0.0822,0.0854,1.0
4,Kate Bush,Hounds of Love (2018 Remaster),Hounds Of Love - 2018 Remaster,7Gmds7ZyvhKdoU0Ho4GIxK,182867,0.543,0.928,0.795,145.523,-12.769,0.0394,0.000003,0.2,0.498,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,The Weeknd,"My Dear Melancholy,",Wasted Times,72TyiiduRmczEbDIEtKQdR,220293,0.562,0.293,0.674,122.699,-6.049,0.0851,0.00002,0.111,0.622,0.0
96,EDEN,End Credits,End Credits,7xtV2OjGY0yvUgBKS0OuVi,240390,0.435,0.314,0.505,171.352,-5.104,0.147,0,0.0859,0.588,0.0
97,Marc E. Bassy,Gossip Columns,Plot Twist,5wJL4o7k8m02m6ZM9KT0ir,227147,0.543,0.706,0.644,97.41,-5.952,0.333,0,0.0733,0.113,0.0
98,August Alsina,Don't Matter,Don't Matter,2zJXJeWnYbOUVTVdRF5EKR,172507,0.668,0.509,0.526,127.97,-7.72,0.165,0,0.0408,0.166,0.0
