In [20]:
import pandas as pd
import numpy as np
import random as rnd

# Data visualization libraries
import matplotlib.pyplot as plt
import seaborn as sns

# Machine learning libraries
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler, RobustScaler, OneHotEncoder
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier

# Deep learning libraries
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Dropout
from scikeras.wrappers import KerasClassifier

# Spotipy
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import spotipy.util as util
from spotipy.oauth2 import SpotifyOAuth
import webbrowser
from json.decoder import JSONDecodeError

In [21]:
# Create App With User Authentication

sp = spotipy.Spotify(auth_manager=SpotifyOAuth(client_id="d62cb14d245c47668fd50431a942c046",
client_secret="fc28483fa9a74b22a90d076120fa3df8",
redirect_uri="http://localhost:5000/callback",
scope="user-library-read"))


results = sp.current_user_saved_tracks()
for idx, item in enumerate(results['items']):
    track = item['track']
    print(idx, track['artists'][0]['name'], "-", track['name'])

0 King Harvest - Dancing in the Moonlight
1 Ashnikko - Daisy
2 Ashnikko - Slumber Party (feat. Princess Nokia)
3 Cuco - Si Me Voy (with The Marías)
4 Hozier - Work Song
5 Jordana - Better in the Dark
6 MUNA - Silk Chiffon
7 WILLIS - I Think I Like When It Rains
8 Ethel Cain - Crush
9 Taylor Swift - seven
10 Lizzy McAlpine - doomsday
11 Bon Iver - For Emma
12 Emotional Oranges - West Coast Love
13 Mitski - Drunk Walk Home
14 Kendrick Lamar - XXX. FEAT. U2.
15 Mac Miller - Self Care
16 Faye Webster - I Know You
17 PARTYNEXTDOOR - Belong to the City
18 Travis Scott - I KNOW ?
19 Nas - Nas Is Like


In [30]:
# get categories
categories = sp.categories(limit=50)
for i, cat in enumerate(categories['categories']['items']):
    print(i, cat['name'])

0 Top Lists
1 Hip-Hop
2 Pop
3 Country
4 Latin
5 Rock
6 Summer
7 Workout
8 R&B
9 Dance/Electronic
10 Netflix
11 Indie
12 Mood
13 Sleep
14 Christian & Gospel
15 Regional Mexican
16 Wellness
17 Chill
18 EQUAL
19 Gaming
20 Frequency
21 Kids & Family
22 Party
23 Decades
24 Fresh Finds
25 Jazz
26 Focus
27 Romance
28 Folk & Acoustic
29 K-Pop
30 Instrumental
31 Ambient
32 Alternative
33 In the car
34 Classical
35 Soul
36 Spotify Singles
37 Cooking & Dining
38 Punk
39 Pop culture
40 Blues
41 Desi
42 Arab
43 RADAR
44 Student
45 Anime
46 Tastemakers
47 Afro
48 Comedy
49 Metal


In [16]:
# get playlists from list of categories
cat = categories['categories']['items'][0]
playlists = sp.category_playlists(cat['id'], limit=50)
for i, playlist in enumerate(playlists['playlists']['items']):
    print(i, playlist['name'])

0 Today’s Top Hits
1 RapCaviar
2 Rock This
3 mint
4 Hot Country
5 Viva Latino
6 RNB X
7 Top 50 - USA
8 Top 50 - Global
9 Viral 50 - Global
10 Viral 50 - USA
11 New Music Friday


In [24]:
# get song ids from list of playlist ids
song_ids = []
for i, playlist in enumerate(playlists['playlists']['items']):
    playlist = sp.playlist(playlist['id'])
    for j, item in enumerate(playlist['tracks']['items']):
        song = item['track']
        song_ids.append(song['id'])
        
song_ids[:5]

['1BxfuPKGuaTgP7aM0Bbdwr',
 '4xhsWYTOGcal8zt0J161CU',
 '2IGMVunIBsBLtEQyoI1Mu7',
 '3rUGC1vUpkDG9CZFHMur1t',
 '4iZ4pt7kvcaH6Yo8UoZ4s2']

In [28]:
# Use song ids to query audio features
features = []
for i in range(0,len(song_ids),50):
    audio_features = sp.audio_features(song_ids[i:i+50])
    for track in audio_features:
        features.append(track)

# Turn the features into a dataframe
features_df = pd.DataFrame(features)

features_df.head()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,0.552,0.702,9,-5.707,1,0.157,0.117,2.1e-05,0.105,0.564,169.994,audio_features,1BxfuPKGuaTgP7aM0Bbdwr,spotify:track:1BxfuPKGuaTgP7aM0Bbdwr,https://api.spotify.com/v1/tracks/1BxfuPKGuaTg...,https://api.spotify.com/v1/audio-analysis/1Bxf...,178427,4
1,0.943,0.558,2,-4.911,1,0.0568,0.0026,2e-06,0.0937,0.606,104.983,audio_features,4xhsWYTOGcal8zt0J161CU,spotify:track:4xhsWYTOGcal8zt0J161CU,https://api.spotify.com/v1/tracks/4xhsWYTOGcal...,https://api.spotify.com/v1/audio-analysis/4xhs...,138411,4
2,0.868,0.538,5,-8.603,1,0.174,0.269,3e-06,0.0901,0.732,99.968,audio_features,2IGMVunIBsBLtEQyoI1Mu7,spotify:track:2IGMVunIBsBLtEQyoI1Mu7,https://api.spotify.com/v1/tracks/2IGMVunIBsBL...,https://api.spotify.com/v1/audio-analysis/2IGM...,231750,4
3,0.75,0.733,6,-3.18,0,0.0319,0.256,0.0,0.114,0.844,111.018,audio_features,3rUGC1vUpkDG9CZFHMur1t,spotify:track:3rUGC1vUpkDG9CZFHMur1t,https://api.spotify.com/v1/tracks/3rUGC1vUpkDG...,https://api.spotify.com/v1/audio-analysis/3rUG...,131872,1
4,0.559,0.551,5,-7.231,1,0.132,0.141,0.0,0.11,0.392,143.008,audio_features,4iZ4pt7kvcaH6Yo8UoZ4s2,spotify:track:4iZ4pt7kvcaH6Yo8UoZ4s2,https://api.spotify.com/v1/tracks/4iZ4pt7kvcaH...,https://api.spotify.com/v1/audio-analysis/4iZ4...,201800,4


In [32]:
# Get track info from gracenote
# NOTE: Gracenote API has built-in fuzzy matching to artist and track.
from collections import defaultdict

def get_gn_multiple(search, dictionary, item):
    '''
    Helper function to get multiple items within Gracenote record
    '''
    for i in search[item].iteritems():
        dictionary[item + '_' + i[0]] = i[1]['TEXT']

def get_gn(artist, track):
    '''
    Gets artist and track information from Gracenote
    '''
    gn_dict = defaultdict(list)
    gn_info = pygn.search(client_id, userid, artist=artist, track=track)

    gn_dict['gnid'] = gn_info['track_gnid']

    # artist specific info
    for a in ['artist_origin', 'artist_type', 'artist_era']:
        get_gn_multiple(gn_info, gn_dict, a)
    # track specific info
    for s in ['genre', 'mood', 'tempo']: # can potentially drop 'tempo' since Spotify has already captured this
        get_gn_multiple(gn_info, gn_dict, s)      

    return dict(gn_dict)


UnboundLocalError: cannot access local variable 'userID' where it is not associated with a value