In [1]:
import pandas as pd
import numpy as np

from sklearn.neighbors import NearestNeighbors
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler, MinMaxScaler


from sklearn.metrics import silhouette_samples, silhouette_score
from pathlib import Path  



In [2]:
#read in file for KNN_top_tracks DataFrame
file1 = 'resources/tracks_df.csv'
KNN_top_tracks = pd.read_csv(file1)

In [3]:
KNN_top_tracks.head()

Unnamed: 0,id,name,popularity,duration_ms,explicit,artists,release_date,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,2G0GextMwZJLkNxcSZ7ZJ3,(What A) Wonderful World - Mono,67,128787,0,['Sam Cooke'],1960-02-01,0.686,0.672,11,-5.523,1,0.0323,0.7,0.0,0.135,0.857,128.55
1,3oAWTk92mZBxKBOKf8mR5v,Summertime Blues,64,119360,0,['Eddie Cochran'],1960-05-01,0.714,0.886,11,-8.629,0,0.0554,0.116,0.184,0.18,0.954,156.351
2,2x6pbpjVGjiWCcH89IK8AX,Breaking Up Is Hard to Do,63,139200,0,['Neil Sedaka'],1960-12-30,0.743,0.799,8,-5.466,0,0.0375,0.699,0.0,0.0635,0.965,116.112
3,47mA6f44zxLtdATOoY7GjN,Georgia on My Mind - Original Master Recording,61,217415,0,['Ray Charles'],1960-09-01,0.138,0.399,7,-8.756,1,0.0311,0.782,4e-06,0.188,0.296,179.93
4,0DICNd5XQ1og9UeYzxoNFV,Baby (You've Got What It Takes),60,165760,0,"['Dinah Washington', 'Brook Benton']",1960-07-05,0.67,0.596,3,-9.347,1,0.0627,0.852,0.00203,0.653,0.813,133.396


In [4]:
KNN_top_tracks.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 509268 entries, 0 to 509267
Data columns (total 18 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   id                509268 non-null  object 
 1   name              509268 non-null  object 
 2   popularity        509268 non-null  int64  
 3   duration_ms       509268 non-null  int64  
 4   explicit          509268 non-null  int64  
 5   artists           509268 non-null  object 
 6   release_date      509268 non-null  object 
 7   danceability      509268 non-null  float64
 8   energy            509268 non-null  float64
 9   key               509268 non-null  int64  
 10  loudness          509268 non-null  float64
 11  mode              509268 non-null  int64  
 12  speechiness       509268 non-null  float64
 13  acousticness      509268 non-null  float64
 14  instrumentalness  509268 non-null  float64
 15  liveness          509268 non-null  float64
 16  valence           50

In [5]:
# Read in file for KNN_top_tracks_alpha DataFrame

KNN_top_tracks_alpha = pd.read_csv(file1)

In [6]:
KNN_top_tracks_alpha.head()

Unnamed: 0,id,name,popularity,duration_ms,explicit,artists,release_date,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,2G0GextMwZJLkNxcSZ7ZJ3,(What A) Wonderful World - Mono,67,128787,0,['Sam Cooke'],1960-02-01,0.686,0.672,11,-5.523,1,0.0323,0.7,0.0,0.135,0.857,128.55
1,3oAWTk92mZBxKBOKf8mR5v,Summertime Blues,64,119360,0,['Eddie Cochran'],1960-05-01,0.714,0.886,11,-8.629,0,0.0554,0.116,0.184,0.18,0.954,156.351
2,2x6pbpjVGjiWCcH89IK8AX,Breaking Up Is Hard to Do,63,139200,0,['Neil Sedaka'],1960-12-30,0.743,0.799,8,-5.466,0,0.0375,0.699,0.0,0.0635,0.965,116.112
3,47mA6f44zxLtdATOoY7GjN,Georgia on My Mind - Original Master Recording,61,217415,0,['Ray Charles'],1960-09-01,0.138,0.399,7,-8.756,1,0.0311,0.782,4e-06,0.188,0.296,179.93
4,0DICNd5XQ1og9UeYzxoNFV,Baby (You've Got What It Takes),60,165760,0,"['Dinah Washington', 'Brook Benton']",1960-07-05,0.67,0.596,3,-9.347,1,0.0627,0.852,0.00203,0.653,0.813,133.396


In [7]:
KNN_top_tracks_alpha.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 509268 entries, 0 to 509267
Data columns (total 18 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   id                509268 non-null  object 
 1   name              509268 non-null  object 
 2   popularity        509268 non-null  int64  
 3   duration_ms       509268 non-null  int64  
 4   explicit          509268 non-null  int64  
 5   artists           509268 non-null  object 
 6   release_date      509268 non-null  object 
 7   danceability      509268 non-null  float64
 8   energy            509268 non-null  float64
 9   key               509268 non-null  int64  
 10  loudness          509268 non-null  float64
 11  mode              509268 non-null  int64  
 12  speechiness       509268 non-null  float64
 13  acousticness      509268 non-null  float64
 14  instrumentalness  509268 non-null  float64
 15  liveness          509268 non-null  float64
 16  valence           50

In [8]:
# Drop Duplicate Song Names

KNN_top_tracks=KNN_top_tracks.drop_duplicates(subset=['name'])

In [9]:
# Change the index of the DataFrame to the song name

KNN_top_tracks.index = KNN_top_tracks['name']


In [10]:
# Set up KNN_top_tracks_proc where the data will be processed.
# Drop the release date column.

KNN_top_tracks_proc=KNN_top_tracks.drop(['release_date'], axis=1)

In [11]:
KNN_top_tracks_proc

Unnamed: 0_level_0,id,name,popularity,duration_ms,explicit,artists,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
(What A) Wonderful World - Mono,2G0GextMwZJLkNxcSZ7ZJ3,(What A) Wonderful World - Mono,67,128787,0,['Sam Cooke'],0.686,0.6720,11,-5.523,1,0.0323,0.700,0.000000,0.1350,0.8570,128.550
Summertime Blues,3oAWTk92mZBxKBOKf8mR5v,Summertime Blues,64,119360,0,['Eddie Cochran'],0.714,0.8860,11,-8.629,0,0.0554,0.116,0.184000,0.1800,0.9540,156.351
Breaking Up Is Hard to Do,2x6pbpjVGjiWCcH89IK8AX,Breaking Up Is Hard to Do,63,139200,0,['Neil Sedaka'],0.743,0.7990,8,-5.466,0,0.0375,0.699,0.000000,0.0635,0.9650,116.112
Georgia on My Mind - Original Master Recording,47mA6f44zxLtdATOoY7GjN,Georgia on My Mind - Original Master Recording,61,217415,0,['Ray Charles'],0.138,0.3990,7,-8.756,1,0.0311,0.782,0.000004,0.1880,0.2960,179.930
Baby (You've Got What It Takes),0DICNd5XQ1og9UeYzxoNFV,Baby (You've Got What It Takes),60,165760,0,"['Dinah Washington', 'Brook Benton']",0.670,0.5960,3,-9.347,1,0.0627,0.852,0.002030,0.6530,0.8130,133.396
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
John Brown's Song,0SjsIzJkZfDU7wlcdklEFR,John Brown's Song,66,185250,0,['Gregory Oberle'],0.562,0.0331,1,-25.551,1,0.1030,0.996,0.961000,0.1110,0.3860,63.696
云与海,5rgu12WBIHQtvej2MdHSH0,云与海,50,258267,0,['阿YueYue'],0.560,0.5180,0,-7.471,0,0.0292,0.785,0.000000,0.0648,0.2110,131.896
blind,0NuWgxEp51CutD2pJoF4OM,blind,72,153293,0,['ROLE MODEL'],0.765,0.6630,0,-5.223,1,0.0652,0.141,0.000297,0.0924,0.6860,150.091
What They'll Say About Us,27Y1N4Q4U3EfDU5Ubw8ws2,What They'll Say About Us,70,187601,0,['FINNEAS'],0.535,0.3140,7,-12.823,0,0.0408,0.895,0.000150,0.0874,0.0663,145.095


In [12]:
KNN_top_tracks_proc.info()

<class 'pandas.core.frame.DataFrame'>
Index: 391661 entries, (What A) Wonderful World - Mono to A Day At A Time
Data columns (total 17 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   id                391661 non-null  object 
 1   name              391661 non-null  object 
 2   popularity        391661 non-null  int64  
 3   duration_ms       391661 non-null  int64  
 4   explicit          391661 non-null  int64  
 5   artists           391661 non-null  object 
 6   danceability      391661 non-null  float64
 7   energy            391661 non-null  float64
 8   key               391661 non-null  int64  
 9   loudness          391661 non-null  float64
 10  mode              391661 non-null  int64  
 11  speechiness       391661 non-null  float64
 12  acousticness      391661 non-null  float64
 13  instrumentalness  391661 non-null  float64
 14  liveness          391661 non-null  float64
 15  valence           391661 non-null 

In [13]:
# Change all the metrics to float

KNN_top_tracks_proc["popularity"] = KNN_top_tracks_proc["popularity"].astype(float)
KNN_top_tracks_proc["duration_ms"] = KNN_top_tracks_proc["duration_ms"].astype(float)
KNN_top_tracks_proc["explicit"] = KNN_top_tracks_proc["explicit"].astype(float)
KNN_top_tracks_proc["energy"] = KNN_top_tracks_proc["explicit"].astype(float)
KNN_top_tracks_proc["key"] = KNN_top_tracks_proc["explicit"].astype(float)
KNN_top_tracks_proc["mode"] = KNN_top_tracks_proc["explicit"].astype(float)
KNN_top_tracks_proc["speechiness"] = KNN_top_tracks_proc["explicit"].astype(float)
KNN_top_tracks_proc["acousticness"] = KNN_top_tracks_proc["explicit"].astype(float)
KNN_top_tracks_proc["instrumentalness"] = KNN_top_tracks_proc["explicit"].astype(float)
KNN_top_tracks_proc["valence"] = KNN_top_tracks_proc["explicit"].astype(float)

In [14]:
KNN_top_tracks_proc.head()

Unnamed: 0_level_0,id,name,popularity,duration_ms,explicit,artists,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
(What A) Wonderful World - Mono,2G0GextMwZJLkNxcSZ7ZJ3,(What A) Wonderful World - Mono,67.0,128787.0,0.0,['Sam Cooke'],0.686,0.0,0.0,-5.523,0.0,0.0,0.0,0.0,0.135,0.0,128.55
Summertime Blues,3oAWTk92mZBxKBOKf8mR5v,Summertime Blues,64.0,119360.0,0.0,['Eddie Cochran'],0.714,0.0,0.0,-8.629,0.0,0.0,0.0,0.0,0.18,0.0,156.351
Breaking Up Is Hard to Do,2x6pbpjVGjiWCcH89IK8AX,Breaking Up Is Hard to Do,63.0,139200.0,0.0,['Neil Sedaka'],0.743,0.0,0.0,-5.466,0.0,0.0,0.0,0.0,0.0635,0.0,116.112
Georgia on My Mind - Original Master Recording,47mA6f44zxLtdATOoY7GjN,Georgia on My Mind - Original Master Recording,61.0,217415.0,0.0,['Ray Charles'],0.138,0.0,0.0,-8.756,0.0,0.0,0.0,0.0,0.188,0.0,179.93
Baby (You've Got What It Takes),0DICNd5XQ1og9UeYzxoNFV,Baby (You've Got What It Takes),60.0,165760.0,0.0,"['Dinah Washington', 'Brook Benton']",0.67,0.0,0.0,-9.347,0.0,0.0,0.0,0.0,0.653,0.0,133.396


In [15]:
KNN_top_tracks_proc.info()

<class 'pandas.core.frame.DataFrame'>
Index: 391661 entries, (What A) Wonderful World - Mono to A Day At A Time
Data columns (total 17 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   id                391661 non-null  object 
 1   name              391661 non-null  object 
 2   popularity        391661 non-null  float64
 3   duration_ms       391661 non-null  float64
 4   explicit          391661 non-null  float64
 5   artists           391661 non-null  object 
 6   danceability      391661 non-null  float64
 7   energy            391661 non-null  float64
 8   key               391661 non-null  float64
 9   loudness          391661 non-null  float64
 10  mode              391661 non-null  float64
 11  speechiness       391661 non-null  float64
 12  acousticness      391661 non-null  float64
 13  instrumentalness  391661 non-null  float64
 14  liveness          391661 non-null  float64
 15  valence           391661 non-null 

In [19]:
# Use the standard scaler on the metrics

KNN_top_tracks_proc[['popularity', 'danceability', 'loudness', 'tempo', 'energy', 'key', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'instrumentalness', 'valence']]=StandardScaler().fit_transform(KNN_top_tracks_proc[['popularity', 'danceability', 'loudness', 'tempo', 'energy', 'key', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'instrumentalness', 'valence']])



In [None]:
KNN_top_tracks_proc.head()

In [None]:
# Drop the id, name and artist columns as they cannot be processed in the NearestNeighbors algorithm.

KNN_top_tracks_proc = KNN_top_tracks_proc.drop(['name','artists', 'id'], axis=1)

In [None]:
KNN_top_tracks_proc.head()

In [None]:
KNN_top_tracks_proc.info()

In [None]:
# Establish model using the NearestNeighbors algorithm

model_knn = NearestNeighbors(metric='cosine', algorithm = 'brute', n_neighbors = 10)


In [None]:
# Fit the KNN_top_tracks_proc with the model

model_knn.fit(KNN_top_tracks_proc)

In [None]:
# Input song

song = input('Enter song title: ')

In [None]:
song

In [None]:
# Extract distance and indices values from the model

distances, indices = model_knn.kneighbors(KNN_top_tracks_proc.loc[song].to_numpy().reshape(1,-1), 11)
    

In [None]:
distances

In [None]:
indices[0]

In [None]:
# Create recommender function

def recommender(song, model_knn, KNN_top_tracks_alpha, KNN_top_tracks_proc):
    
    
    distances, indices = model_knn.kneighbors(KNN_top_tracks_proc.loc[song].to_numpy().reshape(1,-1), 11)
        
    
    
    for i in indices:
        return (KNN_top_tracks_alpha[['name','artists']].loc[i].loc[KNN_top_tracks_alpha['name']!=song])

In [None]:
Run recommender function

recommender(song, model_knn, KNN_top_tracks_alpha, KNN_top_tracks_proc)