In [1]:
import pandas as pd
import json

import spotipy
import spotipy.util as util 
from spotipy.oauth2 import SpotifyClientCredentials
import spotipy.oauth2 as oauth2

# Authorization with Spotify's API

In [2]:
scope = 'user-library-read user-read-private user-read-playback-state user-modify-playback-state'
username = '< your username >'
client_id = '< your client id >'
client_secret = '< your client secret >'
redirect_uri = '< your redirect >'

token = util.prompt_for_user_token(username, scope, client_id, client_secret, redirect_uri)
sp = spotipy.Spotify(auth=token)

user = sp.current_user()

# Some utilities to make life easier

In [None]:
def print_json(to_print):
    print(json.dumps(to_print, sort_keys=True, indent=4))

In [None]:
def get_playlist_tracks(user=None, playlist_id=None):
    index = 0
    playlist_tracks = []
    playlist_length = sp.user_playlist_tracks(user=user, playlist_id=playlist_id, limit=1)['total']
    
    while index < playlist_length:
        batch = sp.user_playlist_tracks(user=user, playlist_id=playlist_id, offset=index)
    
        for track in batch['items']:
            
            features = sp.audio_features(track['track']['id'])[0]
        
            t = {
                # MetaData
                'id': track['track']['id'],
                'name': track['track']['name'],
                'artist(s)': [artist['name'] for artist in track['track']['artists']],
                'popularity': track['track']['popularity'],
                'liked': 1,

                # Audio Features
                'danceability': features['danceability'],
                'energy': features['energy'],
                'key': features['key'],
                'loudness': features['loudness'],
                'mode': features['mode'],
                'speechiness': features['speechiness'],
                'acousticness': features['acousticness'],
                'instrumentalness': features['instrumentalness'],
                'liveness': features['liveness'],
                'valence': features['valence'],
                'tempo': features['tempo'],
                'duration_ms': features['duration_ms'],
                'time_signature': features['time_signature']
            }

            playlist_tracks.append(t)
        
        index += 100
        
    return playlist_tracks

# Gather 'good' and 'bad' songs

#### Use the user's saved tracks as the 'good' playlist

In [None]:
index = 0
saved_tracks = []
saved_length = sp.current_user_saved_tracks(limit=1)['total']

while index < saved_length:
    batch = sp.current_user_saved_tracks(offset=index)
    
    for track in batch['items']:
        
        features = sp.audio_features(track['track']['id'])[0]
        
        t = {
            # MetaData
            'id': track['track']['id'],
            'name': track['track']['name'],
            'artist(s)': [artist['name'] for artist in track['track']['artists']],
            'popularity': track['track']['popularity'],
            'liked': 1,
            
            # Audio Features
            'danceability': features['danceability'],
            'energy': features['energy'],
            'key': features['key'],
            'loudness': features['loudness'],
            'mode': features['mode'],
            'speechiness': features['speechiness'],
            'acousticness': features['acousticness'],
            'instrumentalness': features['instrumentalness'],
            'liveness': features['liveness'],
            'valence': features['valence'],
            'tempo': features['tempo'],
            'duration_ms': features['duration_ms'],
            'time_signature': features['time_signature']
        }
    
        saved_tracks.append(t)

    index += 20
    
assert len(saved_tracks) == saved_length

#### Using a playlist named 'test_ml_bad' as our 'bad' playlist

This playlist needs to be broad, so I included 'hick-hop', Boy Bands, and all 42 Kidz Bop albums

In [None]:
disliked_tracks = get_playlist_tracks(user=user['id'], playlist_id='6sd1N50ZULzrgoWX0ViDwC')

assert len(disliked_tracks) > 500

#### Create a datafram of these playlists

In [None]:
all_tracks = []

all_tracks.extend(saved_tracks)
all_tracks.extend(disliked_tracks)

df = pd.DataFrame(all_tracks)
df = df.set_index('id')

In [None]:
df.head()

In [None]:
df.to_csv(r'tracks.csv')