In [1]:
import pandas as pd
import numpy as np

from sklearn.neighbors import NearestNeighbors
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler, MinMaxScaler


from sklearn.metrics import silhouette_samples, silhouette_score
from pathlib import Path  



In [2]:
# read in file
file1 = 'resources/tracks_df.csv'
tracks_df = pd.read_csv(file1)

In [3]:
tracks_df.head(5)

Unnamed: 0,id,name,popularity,duration_ms,explicit,artists,release_date,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,2G0GextMwZJLkNxcSZ7ZJ3,(What A) Wonderful World - Mono,67,128787,0,['Sam Cooke'],1960-02-01,0.686,0.672,11,-5.523,1,0.0323,0.7,0.0,0.135,0.857,128.55
1,3oAWTk92mZBxKBOKf8mR5v,Summertime Blues,64,119360,0,['Eddie Cochran'],1960-05-01,0.714,0.886,11,-8.629,0,0.0554,0.116,0.184,0.18,0.954,156.351
2,2x6pbpjVGjiWCcH89IK8AX,Breaking Up Is Hard to Do,63,139200,0,['Neil Sedaka'],1960-12-30,0.743,0.799,8,-5.466,0,0.0375,0.699,0.0,0.0635,0.965,116.112
3,47mA6f44zxLtdATOoY7GjN,Georgia on My Mind - Original Master Recording,61,217415,0,['Ray Charles'],1960-09-01,0.138,0.399,7,-8.756,1,0.0311,0.782,4e-06,0.188,0.296,179.93
4,0DICNd5XQ1og9UeYzxoNFV,Baby (You've Got What It Takes),60,165760,0,"['Dinah Washington', 'Brook Benton']",1960-07-05,0.67,0.596,3,-9.347,1,0.0627,0.852,0.00203,0.653,0.813,133.396


In [4]:
tracks_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 509268 entries, 0 to 509267
Data columns (total 18 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   id                509268 non-null  object 
 1   name              509268 non-null  object 
 2   popularity        509268 non-null  int64  
 3   duration_ms       509268 non-null  int64  
 4   explicit          509268 non-null  int64  
 5   artists           509268 non-null  object 
 6   release_date      509268 non-null  object 
 7   danceability      509268 non-null  float64
 8   energy            509268 non-null  float64
 9   key               509268 non-null  int64  
 10  loudness          509268 non-null  float64
 11  mode              509268 non-null  int64  
 12  speechiness       509268 non-null  float64
 13  acousticness      509268 non-null  float64
 14  instrumentalness  509268 non-null  float64
 15  liveness          509268 non-null  float64
 16  valence           50

In [5]:
tracks_df.shape[0]

509268

In [6]:
tracks_df=tracks_df.drop_duplicates(subset=['name'])

In [7]:
tracks_df.shape[0]

391661

In [8]:
tracks_df.head()

Unnamed: 0,id,name,popularity,duration_ms,explicit,artists,release_date,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,2G0GextMwZJLkNxcSZ7ZJ3,(What A) Wonderful World - Mono,67,128787,0,['Sam Cooke'],1960-02-01,0.686,0.672,11,-5.523,1,0.0323,0.7,0.0,0.135,0.857,128.55
1,3oAWTk92mZBxKBOKf8mR5v,Summertime Blues,64,119360,0,['Eddie Cochran'],1960-05-01,0.714,0.886,11,-8.629,0,0.0554,0.116,0.184,0.18,0.954,156.351
2,2x6pbpjVGjiWCcH89IK8AX,Breaking Up Is Hard to Do,63,139200,0,['Neil Sedaka'],1960-12-30,0.743,0.799,8,-5.466,0,0.0375,0.699,0.0,0.0635,0.965,116.112
3,47mA6f44zxLtdATOoY7GjN,Georgia on My Mind - Original Master Recording,61,217415,0,['Ray Charles'],1960-09-01,0.138,0.399,7,-8.756,1,0.0311,0.782,4e-06,0.188,0.296,179.93
4,0DICNd5XQ1og9UeYzxoNFV,Baby (You've Got What It Takes),60,165760,0,"['Dinah Washington', 'Brook Benton']",1960-07-05,0.67,0.596,3,-9.347,1,0.0627,0.852,0.00203,0.653,0.813,133.396


In [9]:
top_tracks_df = tracks_df[tracks_df["popularity"]>= 80]

In [10]:
top_tracks_df.head()


Unnamed: 0,id,name,popularity,duration_ms,explicit,artists,release_date,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo
4382,745H5CctFr12Mo7cqa1BMH,My Girl,80,165000,0,['The Temptations'],1965-03-22,0.572,0.418,0,-10.738,1,0.0349,0.635,0.0,0.0961,0.694,104.566
6382,7tqhbajSfrz2F7E1Z75ASX,Ain't No Mountain High Enough,82,151667,0,"['Marvin Gaye', 'Tammi Terrell']",1967-08-29,0.663,0.6,7,-10.87,1,0.032,0.43,0.0,0.184,0.8,129.991
6383,3yrSvpt2l1xhsV9Em88Pul,Brown Eyed Girl,80,183307,0,['Van Morrison'],1967-09-01,0.491,0.583,7,-10.964,1,0.0376,0.182,0.0,0.407,0.907,150.572
8372,6dGnYIeXmHdcikdzNNDMm2,Here Comes The Sun - Remastered 2009,83,185733,0,['The Beatles'],1969-09-26,0.557,0.54,9,-10.484,1,0.0347,0.0339,0.00248,0.179,0.394,129.171
8373,4BP3uh0hFLFRb5cjsgLqDh,Fortunate Son,83,140773,0,['Creedence Clearwater Revival'],1969-11-02,0.64,0.663,0,-7.516,1,0.0374,0.201,0.00806,0.152,0.663,132.77


In [11]:
filepath = Path('Resources/KNN_top_tracks.csv')  
filepath.parent.mkdir(parents=True, exist_ok=True)  
top_tracks_df.to_csv(filepath)  


In [12]:
file2 = 'resources/KNN_top_tracks.csv'
KNN_top_tracks = tracks_df

In [13]:
KNN_top_tracks.head()

Unnamed: 0,id,name,popularity,duration_ms,explicit,artists,release_date,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,2G0GextMwZJLkNxcSZ7ZJ3,(What A) Wonderful World - Mono,67,128787,0,['Sam Cooke'],1960-02-01,0.686,0.672,11,-5.523,1,0.0323,0.7,0.0,0.135,0.857,128.55
1,3oAWTk92mZBxKBOKf8mR5v,Summertime Blues,64,119360,0,['Eddie Cochran'],1960-05-01,0.714,0.886,11,-8.629,0,0.0554,0.116,0.184,0.18,0.954,156.351
2,2x6pbpjVGjiWCcH89IK8AX,Breaking Up Is Hard to Do,63,139200,0,['Neil Sedaka'],1960-12-30,0.743,0.799,8,-5.466,0,0.0375,0.699,0.0,0.0635,0.965,116.112
3,47mA6f44zxLtdATOoY7GjN,Georgia on My Mind - Original Master Recording,61,217415,0,['Ray Charles'],1960-09-01,0.138,0.399,7,-8.756,1,0.0311,0.782,4e-06,0.188,0.296,179.93
4,0DICNd5XQ1og9UeYzxoNFV,Baby (You've Got What It Takes),60,165760,0,"['Dinah Washington', 'Brook Benton']",1960-07-05,0.67,0.596,3,-9.347,1,0.0627,0.852,0.00203,0.653,0.813,133.396


In [14]:
KNN_top_tracks_alpha = pd.read_csv(file1)

In [15]:
KNN_top_tracks_alpha.head()

Unnamed: 0,id,name,popularity,duration_ms,explicit,artists,release_date,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,2G0GextMwZJLkNxcSZ7ZJ3,(What A) Wonderful World - Mono,67,128787,0,['Sam Cooke'],1960-02-01,0.686,0.672,11,-5.523,1,0.0323,0.7,0.0,0.135,0.857,128.55
1,3oAWTk92mZBxKBOKf8mR5v,Summertime Blues,64,119360,0,['Eddie Cochran'],1960-05-01,0.714,0.886,11,-8.629,0,0.0554,0.116,0.184,0.18,0.954,156.351
2,2x6pbpjVGjiWCcH89IK8AX,Breaking Up Is Hard to Do,63,139200,0,['Neil Sedaka'],1960-12-30,0.743,0.799,8,-5.466,0,0.0375,0.699,0.0,0.0635,0.965,116.112
3,47mA6f44zxLtdATOoY7GjN,Georgia on My Mind - Original Master Recording,61,217415,0,['Ray Charles'],1960-09-01,0.138,0.399,7,-8.756,1,0.0311,0.782,4e-06,0.188,0.296,179.93
4,0DICNd5XQ1og9UeYzxoNFV,Baby (You've Got What It Takes),60,165760,0,"['Dinah Washington', 'Brook Benton']",1960-07-05,0.67,0.596,3,-9.347,1,0.0627,0.852,0.00203,0.653,0.813,133.396


In [16]:
#KNN_top_tracks = KNN_top_tracks.drop(['Unnamed: 0'], axis=1)

In [17]:
KNN_top_tracks.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 391661 entries, 0 to 509266
Data columns (total 18 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   id                391661 non-null  object 
 1   name              391661 non-null  object 
 2   popularity        391661 non-null  int64  
 3   duration_ms       391661 non-null  int64  
 4   explicit          391661 non-null  int64  
 5   artists           391661 non-null  object 
 6   release_date      391661 non-null  object 
 7   danceability      391661 non-null  float64
 8   energy            391661 non-null  float64
 9   key               391661 non-null  int64  
 10  loudness          391661 non-null  float64
 11  mode              391661 non-null  int64  
 12  speechiness       391661 non-null  float64
 13  acousticness      391661 non-null  float64
 14  instrumentalness  391661 non-null  float64
 15  liveness          391661 non-null  float64
 16  valence           39

In [18]:
KNN_top_tracks.head()

Unnamed: 0,id,name,popularity,duration_ms,explicit,artists,release_date,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,2G0GextMwZJLkNxcSZ7ZJ3,(What A) Wonderful World - Mono,67,128787,0,['Sam Cooke'],1960-02-01,0.686,0.672,11,-5.523,1,0.0323,0.7,0.0,0.135,0.857,128.55
1,3oAWTk92mZBxKBOKf8mR5v,Summertime Blues,64,119360,0,['Eddie Cochran'],1960-05-01,0.714,0.886,11,-8.629,0,0.0554,0.116,0.184,0.18,0.954,156.351
2,2x6pbpjVGjiWCcH89IK8AX,Breaking Up Is Hard to Do,63,139200,0,['Neil Sedaka'],1960-12-30,0.743,0.799,8,-5.466,0,0.0375,0.699,0.0,0.0635,0.965,116.112
3,47mA6f44zxLtdATOoY7GjN,Georgia on My Mind - Original Master Recording,61,217415,0,['Ray Charles'],1960-09-01,0.138,0.399,7,-8.756,1,0.0311,0.782,4e-06,0.188,0.296,179.93
4,0DICNd5XQ1og9UeYzxoNFV,Baby (You've Got What It Takes),60,165760,0,"['Dinah Washington', 'Brook Benton']",1960-07-05,0.67,0.596,3,-9.347,1,0.0627,0.852,0.00203,0.653,0.813,133.396


In [19]:
KNN_top_tracks_index=KNN_top_tracks
KNN_top_tracks_index.index = KNN_top_tracks_index['name']


In [20]:
KNN_top_tracks_index.head()

Unnamed: 0_level_0,id,name,popularity,duration_ms,explicit,artists,release_date,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
(What A) Wonderful World - Mono,2G0GextMwZJLkNxcSZ7ZJ3,(What A) Wonderful World - Mono,67,128787,0,['Sam Cooke'],1960-02-01,0.686,0.672,11,-5.523,1,0.0323,0.7,0.0,0.135,0.857,128.55
Summertime Blues,3oAWTk92mZBxKBOKf8mR5v,Summertime Blues,64,119360,0,['Eddie Cochran'],1960-05-01,0.714,0.886,11,-8.629,0,0.0554,0.116,0.184,0.18,0.954,156.351
Breaking Up Is Hard to Do,2x6pbpjVGjiWCcH89IK8AX,Breaking Up Is Hard to Do,63,139200,0,['Neil Sedaka'],1960-12-30,0.743,0.799,8,-5.466,0,0.0375,0.699,0.0,0.0635,0.965,116.112
Georgia on My Mind - Original Master Recording,47mA6f44zxLtdATOoY7GjN,Georgia on My Mind - Original Master Recording,61,217415,0,['Ray Charles'],1960-09-01,0.138,0.399,7,-8.756,1,0.0311,0.782,4e-06,0.188,0.296,179.93
Baby (You've Got What It Takes),0DICNd5XQ1og9UeYzxoNFV,Baby (You've Got What It Takes),60,165760,0,"['Dinah Washington', 'Brook Benton']",1960-07-05,0.67,0.596,3,-9.347,1,0.0627,0.852,0.00203,0.653,0.813,133.396


In [21]:
KNN_top_tracks_index.drop(['name'], axis=1)

Unnamed: 0_level_0,id,popularity,duration_ms,explicit,artists,release_date,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
(What A) Wonderful World - Mono,2G0GextMwZJLkNxcSZ7ZJ3,67,128787,0,['Sam Cooke'],1960-02-01,0.686,0.6720,11,-5.523,1,0.0323,0.700,0.000000,0.1350,0.8570,128.550
Summertime Blues,3oAWTk92mZBxKBOKf8mR5v,64,119360,0,['Eddie Cochran'],1960-05-01,0.714,0.8860,11,-8.629,0,0.0554,0.116,0.184000,0.1800,0.9540,156.351
Breaking Up Is Hard to Do,2x6pbpjVGjiWCcH89IK8AX,63,139200,0,['Neil Sedaka'],1960-12-30,0.743,0.7990,8,-5.466,0,0.0375,0.699,0.000000,0.0635,0.9650,116.112
Georgia on My Mind - Original Master Recording,47mA6f44zxLtdATOoY7GjN,61,217415,0,['Ray Charles'],1960-09-01,0.138,0.3990,7,-8.756,1,0.0311,0.782,0.000004,0.1880,0.2960,179.930
Baby (You've Got What It Takes),0DICNd5XQ1og9UeYzxoNFV,60,165760,0,"['Dinah Washington', 'Brook Benton']",1960-07-05,0.670,0.5960,3,-9.347,1,0.0627,0.852,0.002030,0.6530,0.8130,133.396
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
John Brown's Song,0SjsIzJkZfDU7wlcdklEFR,66,185250,0,['Gregory Oberle'],2020-03-20,0.562,0.0331,1,-25.551,1,0.1030,0.996,0.961000,0.1110,0.3860,63.696
云与海,5rgu12WBIHQtvej2MdHSH0,50,258267,0,['阿YueYue'],2020-09-26,0.560,0.5180,0,-7.471,0,0.0292,0.785,0.000000,0.0648,0.2110,131.896
blind,0NuWgxEp51CutD2pJoF4OM,72,153293,0,['ROLE MODEL'],2020-10-21,0.765,0.6630,0,-5.223,1,0.0652,0.141,0.000297,0.0924,0.6860,150.091
What They'll Say About Us,27Y1N4Q4U3EfDU5Ubw8ws2,70,187601,0,['FINNEAS'],2020-09-02,0.535,0.3140,7,-12.823,0,0.0408,0.895,0.000150,0.0874,0.0663,145.095


In [22]:
#tracks_index_df = tracks_df

In [23]:
#tracks_index_df.head()

In [24]:
#tracks_df2 = tracks_df
#top_tracks_df2 = tracks_df2

In [25]:
#tracks_df2.head()

In [26]:
#tracks_df.index = tracks_df['id']

In [27]:
#tracks_df = tracks_df.drop(['id'], axis=1)

In [28]:
#tracks_df.head()

In [29]:
#top_tracks_df = tracks_df

In [30]:
#top_tracks_df.head()

In [31]:
KNN_top_tracks_proc=KNN_top_tracks.drop(['release_date'], axis=1)
#.drop(['energy','key','mode','speechiness','acousticness','instrumentalness','liveness', 'valence', 'release_date'], axis=1)

In [32]:
KNN_top_tracks_proc

Unnamed: 0_level_0,id,name,popularity,duration_ms,explicit,artists,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
(What A) Wonderful World - Mono,2G0GextMwZJLkNxcSZ7ZJ3,(What A) Wonderful World - Mono,67,128787,0,['Sam Cooke'],0.686,0.6720,11,-5.523,1,0.0323,0.700,0.000000,0.1350,0.8570,128.550
Summertime Blues,3oAWTk92mZBxKBOKf8mR5v,Summertime Blues,64,119360,0,['Eddie Cochran'],0.714,0.8860,11,-8.629,0,0.0554,0.116,0.184000,0.1800,0.9540,156.351
Breaking Up Is Hard to Do,2x6pbpjVGjiWCcH89IK8AX,Breaking Up Is Hard to Do,63,139200,0,['Neil Sedaka'],0.743,0.7990,8,-5.466,0,0.0375,0.699,0.000000,0.0635,0.9650,116.112
Georgia on My Mind - Original Master Recording,47mA6f44zxLtdATOoY7GjN,Georgia on My Mind - Original Master Recording,61,217415,0,['Ray Charles'],0.138,0.3990,7,-8.756,1,0.0311,0.782,0.000004,0.1880,0.2960,179.930
Baby (You've Got What It Takes),0DICNd5XQ1og9UeYzxoNFV,Baby (You've Got What It Takes),60,165760,0,"['Dinah Washington', 'Brook Benton']",0.670,0.5960,3,-9.347,1,0.0627,0.852,0.002030,0.6530,0.8130,133.396
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
John Brown's Song,0SjsIzJkZfDU7wlcdklEFR,John Brown's Song,66,185250,0,['Gregory Oberle'],0.562,0.0331,1,-25.551,1,0.1030,0.996,0.961000,0.1110,0.3860,63.696
云与海,5rgu12WBIHQtvej2MdHSH0,云与海,50,258267,0,['阿YueYue'],0.560,0.5180,0,-7.471,0,0.0292,0.785,0.000000,0.0648,0.2110,131.896
blind,0NuWgxEp51CutD2pJoF4OM,blind,72,153293,0,['ROLE MODEL'],0.765,0.6630,0,-5.223,1,0.0652,0.141,0.000297,0.0924,0.6860,150.091
What They'll Say About Us,27Y1N4Q4U3EfDU5Ubw8ws2,What They'll Say About Us,70,187601,0,['FINNEAS'],0.535,0.3140,7,-12.823,0,0.0408,0.895,0.000150,0.0874,0.0663,145.095


In [33]:
KNN_top_tracks_proc.info()

<class 'pandas.core.frame.DataFrame'>
Index: 391661 entries, (What A) Wonderful World - Mono to A Day At A Time
Data columns (total 17 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   id                391661 non-null  object 
 1   name              391661 non-null  object 
 2   popularity        391661 non-null  int64  
 3   duration_ms       391661 non-null  int64  
 4   explicit          391661 non-null  int64  
 5   artists           391661 non-null  object 
 6   danceability      391661 non-null  float64
 7   energy            391661 non-null  float64
 8   key               391661 non-null  int64  
 9   loudness          391661 non-null  float64
 10  mode              391661 non-null  int64  
 11  speechiness       391661 non-null  float64
 12  acousticness      391661 non-null  float64
 13  instrumentalness  391661 non-null  float64
 14  liveness          391661 non-null  float64
 15  valence           391661 non-null 

In [34]:
#proctop_tracks_df = top_tracks_df.drop(['name','artists'], axis=1)

In [35]:
#top_tracks_df.info()

In [36]:
#top_tracks_df.head()

In [37]:
KNN_top_tracks_proc["popularity"] = KNN_top_tracks_proc["popularity"].astype(float)
KNN_top_tracks_proc["duration_ms"] = KNN_top_tracks_proc["duration_ms"].astype(float)
KNN_top_tracks_proc["explicit"] = KNN_top_tracks_proc["explicit"].astype(float)
KNN_top_tracks_proc["energy"] = KNN_top_tracks_proc["explicit"].astype(float)
KNN_top_tracks_proc["key"] = KNN_top_tracks_proc["explicit"].astype(float)
KNN_top_tracks_proc["mode"] = KNN_top_tracks_proc["explicit"].astype(float)
KNN_top_tracks_proc["speechiness"] = KNN_top_tracks_proc["explicit"].astype(float)
KNN_top_tracks_proc["acousticness"] = KNN_top_tracks_proc["explicit"].astype(float)
KNN_top_tracks_proc["instrumentalness"] = KNN_top_tracks_proc["explicit"].astype(float)
KNN_top_tracks_proc["valence"] = KNN_top_tracks_proc["explicit"].astype(float)

#'energy','key','mode','speechiness','acousticness','instrumentalness','liveness', 'valence'

In [38]:
KNN_top_tracks_proc.info()

<class 'pandas.core.frame.DataFrame'>
Index: 391661 entries, (What A) Wonderful World - Mono to A Day At A Time
Data columns (total 17 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   id                391661 non-null  object 
 1   name              391661 non-null  object 
 2   popularity        391661 non-null  float64
 3   duration_ms       391661 non-null  float64
 4   explicit          391661 non-null  float64
 5   artists           391661 non-null  object 
 6   danceability      391661 non-null  float64
 7   energy            391661 non-null  float64
 8   key               391661 non-null  float64
 9   loudness          391661 non-null  float64
 10  mode              391661 non-null  float64
 11  speechiness       391661 non-null  float64
 12  acousticness      391661 non-null  float64
 13  instrumentalness  391661 non-null  float64
 14  liveness          391661 non-null  float64
 15  valence           391661 non-null 

In [39]:
KNN_top_tracks_proc.columns

Index(['id', 'name', 'popularity', 'duration_ms', 'explicit', 'artists',
       'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
       'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo'],
      dtype='object')

In [40]:
KNN_top_tracks_proc.head()

Unnamed: 0_level_0,id,name,popularity,duration_ms,explicit,artists,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
(What A) Wonderful World - Mono,2G0GextMwZJLkNxcSZ7ZJ3,(What A) Wonderful World - Mono,67.0,128787.0,0.0,['Sam Cooke'],0.686,0.0,0.0,-5.523,0.0,0.0,0.0,0.0,0.135,0.0,128.55
Summertime Blues,3oAWTk92mZBxKBOKf8mR5v,Summertime Blues,64.0,119360.0,0.0,['Eddie Cochran'],0.714,0.0,0.0,-8.629,0.0,0.0,0.0,0.0,0.18,0.0,156.351
Breaking Up Is Hard to Do,2x6pbpjVGjiWCcH89IK8AX,Breaking Up Is Hard to Do,63.0,139200.0,0.0,['Neil Sedaka'],0.743,0.0,0.0,-5.466,0.0,0.0,0.0,0.0,0.0635,0.0,116.112
Georgia on My Mind - Original Master Recording,47mA6f44zxLtdATOoY7GjN,Georgia on My Mind - Original Master Recording,61.0,217415.0,0.0,['Ray Charles'],0.138,0.0,0.0,-8.756,0.0,0.0,0.0,0.0,0.188,0.0,179.93
Baby (You've Got What It Takes),0DICNd5XQ1og9UeYzxoNFV,Baby (You've Got What It Takes),60.0,165760.0,0.0,"['Dinah Washington', 'Brook Benton']",0.67,0.0,0.0,-9.347,0.0,0.0,0.0,0.0,0.653,0.0,133.396


In [41]:
KNN_top_tracks_proc[['popularity', 'danceability', 'loudness', 'tempo', 'energy', 'key', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'instrumentalness', 'valence']]= StandardScaler().fit_transform(KNN_top_tracks_proc
    [['popularity', 'danceability', 'loudness', 'tempo', 'energy', 'key', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'instrumentalness', 'valence']])

#'energy','key','mode','speechiness','acousticness','instrumentalness','liveness', 'valence'

In [42]:
KNN_top_tracks_proc.head()

Unnamed: 0_level_0,id,name,popularity,duration_ms,explicit,artists,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
(What A) Wonderful World - Mono,2G0GextMwZJLkNxcSZ7ZJ3,(What A) Wonderful World - Mono,2.154349,128787.0,0.0,['Sam Cooke'],0.689451,-0.234264,-0.234264,0.879065,-0.234264,-0.234264,-0.234264,-0.234264,0.135,-0.234264,0.306697
Summertime Blues,3oAWTk92mZBxKBOKf8mR5v,Summertime Blues,1.977253,119360.0,0.0,['Eddie Cochran'],0.86038,-0.234264,-0.234264,0.227086,-0.234264,-0.234264,-0.234264,-0.234264,0.18,-0.234264,1.253152
Breaking Up Is Hard to Do,2x6pbpjVGjiWCcH89IK8AX,Breaking Up Is Hard to Do,1.918221,139200.0,0.0,['Neil Sedaka'],1.037413,-0.234264,-0.234264,0.89103,-0.234264,-0.234264,-0.234264,-0.234264,0.0635,-0.234264,-0.116741
Georgia on My Mind - Original Master Recording,47mA6f44zxLtdATOoY7GjN,Georgia on My Mind - Original Master Recording,1.800158,217415.0,0.0,['Ray Charles'],-2.655868,-0.234264,-0.234264,0.200428,-0.234264,-0.234264,-0.234264,-0.234264,0.188,-0.234264,2.055873
Baby (You've Got What It Takes),0DICNd5XQ1og9UeYzxoNFV,Baby (You've Got What It Takes),1.741126,165760.0,0.0,"['Dinah Washington', 'Brook Benton']",0.591778,-0.234264,-0.234264,0.076371,-0.234264,-0.234264,-0.234264,-0.234264,0.653,-0.234264,0.471674


In [43]:
KNN_top_tracks_proc = KNN_top_tracks_proc.drop(['name','artists'], axis=1)

In [44]:
KNN_top_tracks_proc.head()

Unnamed: 0_level_0,id,popularity,duration_ms,explicit,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
(What A) Wonderful World - Mono,2G0GextMwZJLkNxcSZ7ZJ3,2.154349,128787.0,0.0,0.689451,-0.234264,-0.234264,0.879065,-0.234264,-0.234264,-0.234264,-0.234264,0.135,-0.234264,0.306697
Summertime Blues,3oAWTk92mZBxKBOKf8mR5v,1.977253,119360.0,0.0,0.86038,-0.234264,-0.234264,0.227086,-0.234264,-0.234264,-0.234264,-0.234264,0.18,-0.234264,1.253152
Breaking Up Is Hard to Do,2x6pbpjVGjiWCcH89IK8AX,1.918221,139200.0,0.0,1.037413,-0.234264,-0.234264,0.89103,-0.234264,-0.234264,-0.234264,-0.234264,0.0635,-0.234264,-0.116741
Georgia on My Mind - Original Master Recording,47mA6f44zxLtdATOoY7GjN,1.800158,217415.0,0.0,-2.655868,-0.234264,-0.234264,0.200428,-0.234264,-0.234264,-0.234264,-0.234264,0.188,-0.234264,2.055873
Baby (You've Got What It Takes),0DICNd5XQ1og9UeYzxoNFV,1.741126,165760.0,0.0,0.591778,-0.234264,-0.234264,0.076371,-0.234264,-0.234264,-0.234264,-0.234264,0.653,-0.234264,0.471674


In [45]:
KNN_top_tracks_proc.info()

<class 'pandas.core.frame.DataFrame'>
Index: 391661 entries, (What A) Wonderful World - Mono to A Day At A Time
Data columns (total 15 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   id                391661 non-null  object 
 1   popularity        391661 non-null  float64
 2   duration_ms       391661 non-null  float64
 3   explicit          391661 non-null  float64
 4   danceability      391661 non-null  float64
 5   energy            391661 non-null  float64
 6   key               391661 non-null  float64
 7   loudness          391661 non-null  float64
 8   mode              391661 non-null  float64
 9   speechiness       391661 non-null  float64
 10  acousticness      391661 non-null  float64
 11  instrumentalness  391661 non-null  float64
 12  liveness          391661 non-null  float64
 13  valence           391661 non-null  float64
 14  tempo             391661 non-null  float64
dtypes: float64(14), object(1)
memory u

In [46]:
#KNN_top_tracks_proc.index = KNN_top_tracks_proc['id']


In [47]:
KNN_top_tracks_proc = KNN_top_tracks_proc.drop(['id'], axis=1)

In [48]:
KNN_top_tracks_proc.head()

Unnamed: 0_level_0,popularity,duration_ms,explicit,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
(What A) Wonderful World - Mono,2.154349,128787.0,0.0,0.689451,-0.234264,-0.234264,0.879065,-0.234264,-0.234264,-0.234264,-0.234264,0.135,-0.234264,0.306697
Summertime Blues,1.977253,119360.0,0.0,0.86038,-0.234264,-0.234264,0.227086,-0.234264,-0.234264,-0.234264,-0.234264,0.18,-0.234264,1.253152
Breaking Up Is Hard to Do,1.918221,139200.0,0.0,1.037413,-0.234264,-0.234264,0.89103,-0.234264,-0.234264,-0.234264,-0.234264,0.0635,-0.234264,-0.116741
Georgia on My Mind - Original Master Recording,1.800158,217415.0,0.0,-2.655868,-0.234264,-0.234264,0.200428,-0.234264,-0.234264,-0.234264,-0.234264,0.188,-0.234264,2.055873
Baby (You've Got What It Takes),1.741126,165760.0,0.0,0.591778,-0.234264,-0.234264,0.076371,-0.234264,-0.234264,-0.234264,-0.234264,0.653,-0.234264,0.471674


In [49]:
model_knn = NearestNeighbors(metric='cosine', algorithm = 'brute', n_neighbors = 10)


In [50]:
model_knn.fit(KNN_top_tracks_proc)

NearestNeighbors(algorithm='brute', metric='cosine', n_neighbors=10)

In [51]:
KNN_top_tracks_index.tail()

Unnamed: 0_level_0,id,name,popularity,duration_ms,explicit,artists,release_date,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
John Brown's Song,0SjsIzJkZfDU7wlcdklEFR,John Brown's Song,66,185250,0,['Gregory Oberle'],2020-03-20,0.562,0.0331,1,-25.551,1,0.103,0.996,0.961,0.111,0.386,63.696
云与海,5rgu12WBIHQtvej2MdHSH0,云与海,50,258267,0,['阿YueYue'],2020-09-26,0.56,0.518,0,-7.471,0,0.0292,0.785,0.0,0.0648,0.211,131.896
blind,0NuWgxEp51CutD2pJoF4OM,blind,72,153293,0,['ROLE MODEL'],2020-10-21,0.765,0.663,0,-5.223,1,0.0652,0.141,0.000297,0.0924,0.686,150.091
What They'll Say About Us,27Y1N4Q4U3EfDU5Ubw8ws2,What They'll Say About Us,70,187601,0,['FINNEAS'],2020-09-02,0.535,0.314,7,-12.823,0,0.0408,0.895,0.00015,0.0874,0.0663,145.095
A Day At A Time,45XJsGpFTyzbzeWK8VzR8S,A Day At A Time,58,142003,0,"['Gentle Bones', 'Clara Benin']",2021-03-05,0.696,0.615,10,-6.212,1,0.0345,0.206,3e-06,0.305,0.438,90.029


In [52]:
#song = KNN_top_tracks_index['name'].loc[['Brown Eyed Girl']]

In [95]:
song = input('Enter song title: ')

Enter song title: Breaking Up Is Hard to Do


In [96]:
idx = KNN_top_tracks_index['id'].loc[['Teeth']]

In [97]:
song

'Breaking Up Is Hard to Do'

In [98]:
idx

name
Teeth    26wLOs3ZuHJa2Ihhx6QIE6
Name: id, dtype: object

In [99]:
distances, indices = model_knn.kneighbors(KNN_top_tracks_proc.loc[song].to_numpy().reshape(1,-1), 11)
    

  "X does not have valid feature names, but"


In [100]:
distances

array([[0.00000000e+00, 1.61304303e-12, 1.69519954e-12, 1.84041671e-12,
        1.88127292e-12, 1.91946459e-12, 2.05613304e-12, 2.11575202e-12,
        2.18325358e-12, 2.23665531e-12, 2.28572716e-12]])

In [101]:
indices[0]

array([     2,  40402, 188267, 374831,  43528, 105059, 198386,  41014,
       226942,  51430,  42656], dtype=int64)

In [102]:
KNN_top_tracks_alpha[['name','artists']].loc[indices[0]].loc[KNN_top_tracks_alpha['name']!=song]

Unnamed: 0,name,artists
40402,Fell In Love With a Girl,['The White Stripes']
188267,Mano Lova,['Pikaso']
374831,Fica à Vontade,"['Mãolee', 'Ferrugem', 'Mc Poze do Rodo', 'Xam..."
43528,Levanto Mis Manos,['Samuel Hernández']
105059,Binnen Zonder Kloppen,['De Dijk']
198386,Déjame vivir (feat. Lamari),"['Jarabe De Palo', 'Lamari']"
41014,2021,['FEDERALPRESSHA']
226942,Neighborhood #3 (Power Out),['Arcade Fire']
51430,Como Tú (Magic Music Box),['León Larregui']
42656,Rise Again,['Kamelot']


In [105]:
def recommender(song, model_knn, KNN_top_tracks_alpha, KNN_top_tracks_proc):
    
    
    distances, indices = model_knn.kneighbors(KNN_top_tracks_proc.loc[song].to_numpy().reshape(1,-1), 11)
        
    
    
    for i in indices:
        return (KNN_top_tracks_alpha[['name','artists']].loc[i].loc[KNN_top_tracks_alpha['name']!=song])

In [106]:
recommender(song, model_knn, KNN_top_tracks_alpha, KNN_top_tracks_proc)

  "X does not have valid feature names, but"


Unnamed: 0,name,artists
40402,Fell In Love With a Girl,['The White Stripes']
188267,Mano Lova,['Pikaso']
374831,Fica à Vontade,"['Mãolee', 'Ferrugem', 'Mc Poze do Rodo', 'Xam..."
43528,Levanto Mis Manos,['Samuel Hernández']
105059,Binnen Zonder Kloppen,['De Dijk']
198386,Déjame vivir (feat. Lamari),"['Jarabe De Palo', 'Lamari']"
41014,2021,['FEDERALPRESSHA']
226942,Neighborhood #3 (Power Out),['Arcade Fire']
51430,Como Tú (Magic Music Box),['León Larregui']
42656,Rise Again,['Kamelot']


In [93]:
KNN_top_tracks.head()

Unnamed: 0_level_0,id,name,popularity,duration_ms,explicit,artists,release_date,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
(What A) Wonderful World - Mono,2G0GextMwZJLkNxcSZ7ZJ3,(What A) Wonderful World - Mono,67,128787,0,['Sam Cooke'],1960-02-01,0.686,0.672,11,-5.523,1,0.0323,0.7,0.0,0.135,0.857,128.55
Summertime Blues,3oAWTk92mZBxKBOKf8mR5v,Summertime Blues,64,119360,0,['Eddie Cochran'],1960-05-01,0.714,0.886,11,-8.629,0,0.0554,0.116,0.184,0.18,0.954,156.351
Breaking Up Is Hard to Do,2x6pbpjVGjiWCcH89IK8AX,Breaking Up Is Hard to Do,63,139200,0,['Neil Sedaka'],1960-12-30,0.743,0.799,8,-5.466,0,0.0375,0.699,0.0,0.0635,0.965,116.112
Georgia on My Mind - Original Master Recording,47mA6f44zxLtdATOoY7GjN,Georgia on My Mind - Original Master Recording,61,217415,0,['Ray Charles'],1960-09-01,0.138,0.399,7,-8.756,1,0.0311,0.782,4e-06,0.188,0.296,179.93
Baby (You've Got What It Takes),0DICNd5XQ1og9UeYzxoNFV,Baby (You've Got What It Takes),60,165760,0,"['Dinah Washington', 'Brook Benton']",1960-07-05,0.67,0.596,3,-9.347,1,0.0627,0.852,0.00203,0.653,0.813,133.396


In [94]:
KNN_top_tracks.info()

<class 'pandas.core.frame.DataFrame'>
Index: 391661 entries, (What A) Wonderful World - Mono to A Day At A Time
Data columns (total 18 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   id                391661 non-null  object 
 1   name              391661 non-null  object 
 2   popularity        391661 non-null  int64  
 3   duration_ms       391661 non-null  int64  
 4   explicit          391661 non-null  int64  
 5   artists           391661 non-null  object 
 6   release_date      391661 non-null  object 
 7   danceability      391661 non-null  float64
 8   energy            391661 non-null  float64
 9   key               391661 non-null  int64  
 10  loudness          391661 non-null  float64
 11  mode              391661 non-null  int64  
 12  speechiness       391661 non-null  float64
 13  acousticness      391661 non-null  float64
 14  instrumentalness  391661 non-null  float64
 15  liveness          391661 non-null 

In [64]:
KNN_top_tracks_alpha.head()

Unnamed: 0,id,name,popularity,duration_ms,explicit,artists,release_date,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,2G0GextMwZJLkNxcSZ7ZJ3,(What A) Wonderful World - Mono,67,128787,0,['Sam Cooke'],1960-02-01,0.686,0.672,11,-5.523,1,0.0323,0.7,0.0,0.135,0.857,128.55
1,3oAWTk92mZBxKBOKf8mR5v,Summertime Blues,64,119360,0,['Eddie Cochran'],1960-05-01,0.714,0.886,11,-8.629,0,0.0554,0.116,0.184,0.18,0.954,156.351
2,2x6pbpjVGjiWCcH89IK8AX,Breaking Up Is Hard to Do,63,139200,0,['Neil Sedaka'],1960-12-30,0.743,0.799,8,-5.466,0,0.0375,0.699,0.0,0.0635,0.965,116.112
3,47mA6f44zxLtdATOoY7GjN,Georgia on My Mind - Original Master Recording,61,217415,0,['Ray Charles'],1960-09-01,0.138,0.399,7,-8.756,1,0.0311,0.782,4e-06,0.188,0.296,179.93
4,0DICNd5XQ1og9UeYzxoNFV,Baby (You've Got What It Takes),60,165760,0,"['Dinah Washington', 'Brook Benton']",1960-07-05,0.67,0.596,3,-9.347,1,0.0627,0.852,0.00203,0.653,0.813,133.396


In [65]:
KNN_top_tracks_proc.head()

Unnamed: 0_level_0,popularity,duration_ms,explicit,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
(What A) Wonderful World - Mono,2.154349,128787.0,0.0,0.689451,-0.234264,-0.234264,0.879065,-0.234264,-0.234264,-0.234264,-0.234264,0.135,-0.234264,0.306697
Summertime Blues,1.977253,119360.0,0.0,0.86038,-0.234264,-0.234264,0.227086,-0.234264,-0.234264,-0.234264,-0.234264,0.18,-0.234264,1.253152
Breaking Up Is Hard to Do,1.918221,139200.0,0.0,1.037413,-0.234264,-0.234264,0.89103,-0.234264,-0.234264,-0.234264,-0.234264,0.0635,-0.234264,-0.116741
Georgia on My Mind - Original Master Recording,1.800158,217415.0,0.0,-2.655868,-0.234264,-0.234264,0.200428,-0.234264,-0.234264,-0.234264,-0.234264,0.188,-0.234264,2.055873
Baby (You've Got What It Takes),1.741126,165760.0,0.0,0.591778,-0.234264,-0.234264,0.076371,-0.234264,-0.234264,-0.234264,-0.234264,0.653,-0.234264,0.471674


In [66]:
#def recommender(df, song, n_neighbor=10):
 #   model_knn = NearestNeighbors(metric='cosine', algorithm = 'brute', n_neighbors = 11)
 #   model_knn.fit(KNN_top_tracks_proc)
    
 #   song = KNN_top_tracks.to_numpy()
    
  #  distances, indices = model_knn.kneighbors(song, 11)
    
  #  result = KNN_top_tracks.iloc[indices.flatten()]
 #   result["Distance"] = distance.flatten()
    
  #  return result
    
    
    


In [67]:
#df_result = recommender(KNN_top_tracks, "My Girl")

In [68]:
#df_result.head()