<h2> Selecting music from 'Discover Weekly' based on your current playlist </h2>

Install and import spotipy

In [None]:
!pip install spotipy

In [12]:
import spotipy
import spotipy.util as util
import sys
import pandas
import numpy

Some helper functions for getting all the data and in the right format

In [128]:
#Go to https://developer.spotify.com/dashboard/applications/ and get non-commercial license
def get_token(username, scope, client_id, client_secret, redirect_uri):
    token = util.prompt_for_user_token(
        username=username,
        scope=scope,
        client_id=client_id,
        client_secret=client_secret,
        redirect_uri=redirect_uri)
    return token, scope

In [129]:
#Change username and playlist if accessing anything other than your own saved songs
def get_track_temp(access_token, spotify, username='None', playlist='None'):
    if access_token[1] == 'user-library-read':
        results = spotify.current_user_saved_tracks()
        track_temp = results['items']
        while results['next']:
            results = spotify.next(results)
            track_temp.extend(results['items'])
    else:
        track_playlist = spotify.user_playlist(username, playlist)
        track_tracks = track_playlist['tracks']
        track_temp = test_tracks['items']
        while track_tracks['next']:
            track_tracks = spotify.next(test_tracks)
            for song in track_tracks['items']:
                track_temp.append(song)
            
    return track_temp

In [None]:
#paginate using 'next' to get more than 20 results
#likeness: only binary input. 1 for like, 0 for dislike
def get_saved_tracks(access_token, spotify, track_temp, likeness='None'):
    
    track = []
    index_names = []
    feature_set = []

    track_temp = track_temp
         
    if access_token[0]:
        for i in track_temp:
            track.append(i['track']['id'])
            index_names.append(i['track']['name'])
    
        for song in track:
            features = spotify.audio_features(song)
            for name in features:
                feature_set.append(name)
        
        feature_data = pandas.DataFrame(feature_set, index=index_names)
        
        if likeness == 1:
            feature_data['likeability'] = 1
        elif likeness == 0:
            feature_data['likeability'] = 0
        else:
            pass
            
        feature_data = feature_data.drop(['type', 'id', 'uri', 'track_href', 'analysis_url'], axis=1)
        
    else:
        return "Error"
    
    return feature_data

<h3> Let's begin </h3>

<b> Add client ID, client secret and redirect uri. This is unique to your app</b>

Get it from Spotify's developer dashboard

In [131]:
access_token = get_token('mchakravarti7',
        'user-library-read',
        '#',
        '#',
        '#')

spotify = spotipy.Spotify(auth=access_token[0])

In [None]:
#creating a dataset of liked tracks
liked_tracks = get_track_temp(access_token, spotify)
dataset_pt1 = get_saved_tracks(access_token, spotify, liked_tracks, 1)

In [None]:
#creating a dataset of disliked tracks
disliked_tracks = get_track_temp(access_token, spotify, username='mchakravarti7', playlist='6c4QWRle2qaQM542rtNKK3?si=oVPi36zLTymaIJ03SAjazg')
dataset_pt2 = get_saved_tracks(access_token, spotify, disliked_tracks, 0)

In [None]:
#concat them to create the training dataset
sets = [dataset_pt1, dataset_pt2]
dataset = pandas.concat(sets)

<h3> Building models </h3>

In [176]:
#creating the test set - input a different playlist
test_tracks = get_track_temp(access_token, spotify, username='majumdar22', playlist='6uC23arPZUdWxER9KvfN73?si=rwHdfi8zTkSZiEXOTK3Ddw')
test = get_saved_tracks(access_token, spotify, test_tracks)

IMPORTANT - Remove likeability column

In [None]:
#TODO: Remove this hack
test = test.drop(['likeability'], axis=1)

Define training and testing data

In [185]:
from sklearn.model_selection import train_test_split
train, test = train_test_split(dataset, test_size = 0.3)
x_train = train.iloc[0:,0:13]
y_train = train.iloc[0:, 13]
x_test = test.iloc[0:,0:13]
y_test = test.iloc[0:, 13]

<b>Trying Random Forests</b>

In [196]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import BaggingClassifier

parameters = {
     'n_estimators':(50, 100, 150, 250), 
     'max_samples':(.30,.40,.50,1.0),
     'max_features':(10,11,9,13),
     
}

model = GridSearchCV(BaggingClassifier(),parameters,cv=3,iid=False)
inputs = dataset.iloc[0:, 0:13]
outputs = dataset.iloc[0:, 13]
model.fit(inputs, np.ravel(outputs))
model.best_score_, model.best_params_

(0.5, {'max_features': 10, 'max_samples': 0.3, 'n_estimators': 50})

In [206]:
rf = BaggingClassifier(max_features=10,max_samples=0.3,n_estimators=50)
rf.fit(x_train,y_train)
predictions = rf.predict(test)

In [201]:
predictions

array([0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1,
       1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1,
       0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1,
       1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1,
       0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,
       0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1,
       0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0,
       0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1,
       0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0,
       1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0,
       1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1,
       0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1,
       1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1,
       0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1,

<b>Trying k-Nearest Neighbours</b>

In [207]:
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier()
param_grid = {'n_neighbors': np.arange(1, 10)}
model = GridSearchCV(knn, param_grid, cv=10)
inputs = dataset.iloc[0:, 0:13]
outputs = dataset.iloc[0:, 13]
model.fit(inputs, np.ravel(outputs))
model.best_score_, model.best_params_

(0.5, {'n_neighbors': 1})

In [209]:
real_knn = KNeighborsClassifier(n_neighbors=1)
real_knn.fit(x_train,y_train)
r_knn = real_knn.score(x_test,y_test)
print(r_knn)

0.17353951890034364


In [210]:
predictions = real_knn.predict(test)

In [211]:
predictions

array([0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1,
       1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1,
       1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1,
       1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1,
       0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1,
       0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1,
       0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0,
       0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1,
       0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1,
       1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1,
       0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1,
       1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1,
       1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1,

<h3> Adding songs to playlist based on predictions </h3>