In [1]:
import pandas as pd
import json

import spotipy
import spotipy.util as util 
from spotipy.oauth2 import SpotifyClientCredentials
import spotipy.oauth2 as oauth2

# Authorization with Spotify's API

In [2]:
scope = 'user-library-read user-read-private user-read-playback-state user-modify-playback-state'
username = ''
client_id = ''
client_secret = ''
redirect_uri = 'http://google.com/'

token = util.prompt_for_user_token(username, scope, client_id, client_secret, redirect_uri)
sp = spotipy.Spotify(auth=token)

user = sp.current_user()

# Some utilities to make life easier

In [3]:
def print_json(to_print):
    print(json.dumps(to_print, sort_keys=True, indent=4))

In [4]:
def get_playlist_tracks(user=None, playlist_id=None, liked=None):
    index = 0
    playlist_tracks = []
    playlist_length = sp.user_playlist_tracks(user=user, playlist_id=playlist_id, limit=1)['total']
    
    while index < playlist_length:
        batch = sp.user_playlist_tracks(user=user, playlist_id=playlist_id, offset=index)
    
        for track in batch['items']:
            
            features = sp.audio_features(track['track']['id'])[0]
        
            t = {
                # MetaData
                'id': track['track']['id'],
                'name': track['track']['name'],
                'artist(s)': [artist['name'] for artist in track['track']['artists']],
                'artist_id(s)': [artist['id'] for artist in track['track']['artists']],
                'popularity': track['track']['popularity'],
                'liked': liked,

                # Audio Features
                'danceability': features['danceability'],
                'energy': features['energy'],
                'key': features['key'],
                'loudness': features['loudness'],
                'mode': features['mode'],
                'speechiness': features['speechiness'],
                'acousticness': features['acousticness'],
                'instrumentalness': features['instrumentalness'],
                'liveness': features['liveness'],
                'valence': features['valence'],
                'tempo': features['tempo'],
                'duration_ms': features['duration_ms'],
                'time_signature': features['time_signature']
            }

            playlist_tracks.append(t)
        
        index += 100
        
    return playlist_tracks

# Gather 'good' and 'bad' songs

#### Use the user's saved tracks as the 'good' playlist

In [5]:
index = 0
saved_tracks = []
saved_length = sp.current_user_saved_tracks(limit=1)['total']

while index < saved_length:
    batch = sp.current_user_saved_tracks(offset=index)
    
    for track in batch['items']:
        
        features = sp.audio_features(track['track']['id'])[0]
        
        t = {
            # MetaData
            'id': track['track']['id'],
            'name': track['track']['name'],
            'artist(s)': [artist['name'] for artist in track['track']['artists']],
            'artist_id(s)': [artist['id'] for artist in track['track']['artists']],
            'popularity': track['track']['popularity'],
            'liked': 1,
            
            # Audio Features
            'danceability': features['danceability'],
            'energy': features['energy'],
            'key': features['key'],
            'loudness': features['loudness'],
            'mode': features['mode'],
            'speechiness': features['speechiness'],
            'acousticness': features['acousticness'],
            'instrumentalness': features['instrumentalness'],
            'liveness': features['liveness'],
            'valence': features['valence'],
            'tempo': features['tempo'],
            'duration_ms': features['duration_ms'],
            'time_signature': features['time_signature']
        }
    
        saved_tracks.append(t)

    index += 20
    
assert len(saved_tracks) == saved_length

#### Using a playlist named 'test_ml_bad' as our 'bad' playlist

This playlist needs to be broad, so I included 'hick-hop', Boy Bands, and all 42 Kidz Bop albums

In [6]:
disliked_tracks = get_playlist_tracks(user=user['id'], playlist_id='6sd1N50ZULzrgoWX0ViDwC', liked=0)

assert len(disliked_tracks) > 500

#### Create a datafram of these playlists

In [7]:
all_tracks = []

all_tracks.extend(saved_tracks)
all_tracks.extend(disliked_tracks)

df = pd.DataFrame(all_tracks)
df = df.set_index('id')

In [8]:
df.head()

Unnamed: 0_level_0,name,artist(s),artist_id(s),popularity,liked,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2Fs18NaCDuluPG1DHGw1XG,Life is a Highway,[Rascal Flatts],[0a1gHP0HAqALbEyxaD5Ngn],69,1,0.582,0.952,5,-3.848,1,0.0731,0.00324,0.0,0.223,0.655,103.027,276320,4
42zd6DYQ4o4SECmTITrM1U,Dreams and Nightmares,[Meek Mill],[20sxb77xiYeusSH8cVdatc],68,1,0.631,0.774,11,-2.459,1,0.3,0.468,0.0,0.281,0.44,79.745,230693,4
5csdNgCD64XzhsyoRlhzsa,LEGENDARY (feat. J. Cole),"[Joey Bada$$, J. Cole]","[2P5sC9cVZDToPxyomzF1UH, 6l3HvQ5sa6mXTsMTB19rO5]",53,1,0.722,0.654,11,-5.419,0,0.173,0.198,6e-06,0.0891,0.418,81.984,278112,4
3I9zZfbeaRo9ks1MB7zhmR,"RING THE ALARM (feat. Nyck Caution, Kirk Knigh...","[Joey Bada$$, Nyck Caution, Kirk Knight, Meech...","[2P5sC9cVZDToPxyomzF1UH, 1ZyUXvaBXUsIZ3JD6jYGk...",49,1,0.51,0.907,2,-4.239,1,0.441,0.244,0.0,0.133,0.521,93.442,260122,4
7rBbiuu7s7T0iUTImg4zAc,Be Somebody - Remix,"[Clams Casino, A$AP Rocky, AJ Tracey, Lil B]","[5vSQUyT33qxr1xAX2Tkf3A, 13ubrt8QOOCPljQ2FL1Kc...",33,1,0.592,0.631,1,-6.841,1,0.0898,0.0533,0.0,0.086,0.151,91.893,233773,4


In [9]:
df.to_csv(r'tracks.csv')