<h2> Selecting music from 'Discover Weekly' based on your current playlist </h2>

Install and import spotipy

In [None]:
!pip install spotipy

In [None]:
import spotipy
import spotipy.util as util
import sys
import pandas
import numpy as np
import math
import json
import urllib.request

Some helper functions for getting all the data and in the right format

In [None]:
#Go to https://developer.spotify.com/dashboard/applications/ and get non-commercial license
def get_token(username, scope, client_id, client_secret, redirect_uri):
    token = util.prompt_for_user_token(
        username=username,
        scope=scope,
        client_id=client_id,
        client_secret=client_secret,
        redirect_uri=redirect_uri)
    return token, scope

In [None]:
#Change username and playlist if accessing anything other than your own saved songs
def get_track_temp(access_token, spotify, username='None', playlist='None'):
    if access_token[1] == 'user-library-read user-library-modify' and username == 'None':
        results = spotify.current_user_saved_tracks()
        track_temp = results['items']
        while results['next']:
            results = spotify.next(results)
            track_temp.extend(results['items'])
    else:
        track_playlist = spotify.user_playlist(username, playlist)
        track_tracks = track_playlist['tracks']
        track_temp = track_tracks['items']
        while track_tracks['next']:
            track_tracks = spotify.next(track_tracks)
            for song in track_tracks['items']:
                track_temp.append(song)
            
    return track_temp

In [None]:
#paginate using 'next' to get more than 20 results
#likeness: only binary input. 1 for like, 0 for dislike
def get_saved_tracks(access_token, spotify, track_temp, likeness='None'):
    
    track = []
    index_names = []
    feature_set = []

    track_temp = track_temp
         
    if access_token[0]:
        for i in track_temp:
            track.append(i['track']['id'])
    
        for song in track:
            features = spotify.audio_features(song)
            for name in features:
                feature_set.append(name)
        
        feature_data = pandas.DataFrame(feature_set)
        
        if likeness == 1:
            feature_data['likeability'] = 1
        elif likeness == 0:
            feature_data['likeability'] = 0
        else:
            pass
            
        feature_data = feature_data.drop(['type', 'id', 'uri', 'track_href', 'analysis_url'], axis=1)
        
    else:
        return "Error"
    
    return feature_data

<h3> Get token and begin creating dataset </h3>

In [None]:
access_token = get_token('#')

spotify = spotipy.Spotify(auth=access_token[0])

In [None]:
#creating a dataset of liked tracks
liked_tracks = get_track_temp(access_token, spotify)
dataset_pt1 = get_saved_tracks(access_token, spotify, liked_tracks, 1)

In [None]:
#creating a dataset of disliked tracks
#this can be any playlist you dont like
disliked_tracks = get_track_temp(access_token, spotify, username='mchakravarti7', playlist='6c4QWRle2qaQM542rtNKK3?si=oVPi36zLTymaIJ03SAjazg')
dataset_pt2 = get_saved_tracks(access_token, spotify, disliked_tracks, 0)

In [None]:
#concat them to create the training dataset
sets = [dataset_pt1, dataset_pt2]
dataset = pandas.concat(sets, ignore_index=True, sort=False)

In [None]:
len(dataset)

<h3> Building models </h3>

In [None]:
#creating the test set - input a different playlist
test_tracks = get_track_temp(access_token, spotify, username='mchakravarti7', playlist='37i9dQZF1DX0XUsuxWHRQd?si=T3RUFPkMTWqXEojQL_av7A')
test_data = get_saved_tracks(access_token, spotify, test_tracks)

songs_to_add_temp = []
#get track ids for adding songs later
for i in test_tracks:
    songs_to_add_temp.append(i['track']['id'])

Define training and testing data

In [None]:
from sklearn.model_selection import train_test_split
train, test = train_test_split(dataset, test_size = 0.3)
x_train = train.iloc[0:,0:13]
y_train = train.iloc[0:, 13]
x_test = test.iloc[0:,0:13]
y_test = test.iloc[0:, 13]

<b>Trying k-Nearest Neighbours</b>

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV

knn = KNeighborsClassifier()
param_grid = {'n_neighbors': np.arange(1, 10)}
model = GridSearchCV(knn, param_grid, cv=10)
inputs = dataset.iloc[0:, 0:13]
outputs = dataset.iloc[0:, 13]
model.fit(inputs, np.ravel(outputs))
model.best_score_, model.best_params_

In [None]:
real_knn = KNeighborsClassifier(n_neighbors=9)
real_knn.fit(x_train,y_train)
r_knn = real_knn.score(x_test,y_test)
print(r_knn)

In [None]:
predictions = real_knn.predict(test_data)

In [None]:
predictions

<h3> Adding songs to playlist based on predictions </h3>

In [None]:
songs_to_add = []

for a in range(0, len(predictions)):
    if predictions[a] == 1:
        songs_to_add.append(songs_to_add_temp[a])
    else:
        pass

In [None]:
access_token_2 = get_token('#')

sp = spotipy.Spotify(auth=access_token_2[0])

<h3> Another approach </h3>

In [None]:
#check if euclidean distance to the point (mean of the dataset) is within a certain range
def edist_approach(dataset, song):
    final_list = []
    list1 = []
    total = dataset.mean(axis=0)
    for val in total:
        list1.append(val)
    list1.pop()
    value = np.linalg.norm(np.asarray(list1) - np.asarray(song))
    if value <= 10000:
        final_list.append(song)
    else:
        pass
    if final_list != [] and str(final_list) != None:
        return final_list

In [None]:
val = dataset.mean(axis=0)
val

In [None]:
a = 0
# get the list of songs' data in the test set (this is a list of lists)
list_of_songs = []
while a < 50:
    list1 = []
    for val in test_data.loc[a]:
        list1.append(val)
    list_of_songs.append(list1)
    a = a + 1

final_list_of_songs = []
    
for elem in list_of_songs:
    final_list_of_songs.append(edist_approach(dataset,elem))

list_of_predictions = []

for elem in final_list_of_songs:
    if elem == None:
        list_of_predictions.append(0)
    if elem != None:
        list_of_predictions.append(1)
        
list_of_predictions

In [None]:
songs_to_add = []

for a in range(0, len(list_of_predictions)):
    if list_of_predictions[a] == 1:
        songs_to_add.append(songs_to_add_temp[a])
    
spotify.current_user_saved_tracks_add(songs_to_add)

<h3> Comparing suggested songs to dataset average </h3>

In [None]:
df = dataset.head(4)
df.loc['mean'] = dataset.mean()
df