## Load DataFrame

In [1]:
import pandas as pd
import os

df = pd.read_csv("../data/all.csv")
df.columns                                # 'Artists_Spotify' is from spotify album info
                                          # 'Artists'  is from spotify track info

Index(['Age', 'Album_Name', 'Artist', 'Year', 'Description', 'Age Group',
       'Album_ID', 'Album_Name_Spotify', 'Artists_Spotify', 'Track_ID', 'ISRC',
       'Track_Name', 'Artists', 'popularity', 'preview_url', 'image_url',
       'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
       'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
       'type', 'uri', 'track_href', 'analysis_url', 'duration_ms',
       'time_signature', 'lyrics'],
      dtype='object')

In [2]:
select_columns = ['Track_ID', 'Track_Name', 'preview_url', 'image_url', 'Artists','Age', 'popularity',
       'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
       'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms',
       'time_signature', 'lyrics']

df = df[select_columns]

df['duration_min'] = df['duration_ms']/10**3/60

print (df.shape)
df.head(3)

(19560, 22)


Unnamed: 0,Track_ID,Track_Name,preview_url,image_url,Artists,Age,popularity,danceability,energy,key,...,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,lyrics,duration_min
0,2FPQI1LRwWszttbRG8hknk,Games Monsters Play,https://p.scdn.co/mp3-preview/33cc59cc1836954e...,https://i.scdn.co/image/ab67616d0000b273d61faa...,"['Herry Monster', 'Grover']",2,5,0.738,0.544,7.0,...,0.346,0.212,0.0,0.0937,0.961,144.448,204267.0,4.0,,3.40445
1,6pOoswwC1lNBI2TapMdaEW,Afraid of the Dark,https://p.scdn.co/mp3-preview/cf340f0b536edadd...,https://i.scdn.co/image/ab67616d0000b273d61faa...,['Telly Monster'],2,5,0.505,0.525,0.0,...,0.109,0.355,0.0,0.1,0.444,127.922,141240.0,4.0,,2.354
2,2EEwSq98rKwlRWT7sNCLRc,Eensy Weensy Spider,https://p.scdn.co/mp3-preview/4cdc12aaeb7da4b7...,https://i.scdn.co/image/ab67616d0000b273d61faa...,"['Count Von Count', 'The Sesame Street Kids']",2,9,0.875,0.338,0.0,...,0.397,0.762,0.0,0.0992,0.962,116.027,94693.0,4.0,,1.578217


## Recommend Songs by KNN

### Fit Model

In [3]:
feature_columns = ['key','mode', 'time_signature', 'duration_min','popularity', 
                   'danceability', 'energy','loudness', 'speechiness',
                   'acousticness', 'instrumentalness', 'liveness', 'valence', 
                   'tempo']
df_audio = df.dropna(subset=feature_columns)
df_audio = df_audio.drop_duplicates() #.reset_index(drop=True)
df_audio = df_audio.astype({'key': 'Int64', 'mode':'Int64', 'time_signature':'Int64'})

In [4]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder

from sklearn.neighbors import NearestNeighbors

categorical_columns = ['key','mode', 'time_signature']

numeric_columns = ['Age','duration_min','popularity', 'danceability', 'energy','loudness', 'speechiness',
       'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']

features = ColumnTransformer([
    ('categorical', OneHotEncoder(), categorical_columns),
    ('numeric', 'passthrough', numeric_columns)
])


model = Pipeline([
    ('features', features),
    ('normalize', StandardScaler()),
    ('knn', NearestNeighbors(n_neighbors=10))
])

In [5]:
model.fit(df_audio);

In [6]:
model[2]

NearestNeighbors(n_neighbors=10)

In [7]:
from joblib import dump, load
dump(model[2], '../models/knn_audio_features.joblib')   # dump knn model
dump(df_audio, '../models/songs_df_audio_features.joblib');


In [8]:
distance, indices = model[2].kneighbors()             

In [9]:
indices[0]    # indices[i] is the list of the nearest kneightbors for item-i

array([ 109, 2800, 2246, 2681,  709, 1830, 4021, 3661,  733,  705],
      dtype=int64)

In [10]:
df.iloc[indices[1]][0:3]   # show the first three recommendation for song-1

Unnamed: 0,Track_ID,Track_Name,preview_url,image_url,Artists,Age,popularity,danceability,energy,key,...,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,lyrics,duration_min
2192,1n1AvAiUiOsWsFofNfdkfi,Wimoweh,https://p.scdn.co/mp3-preview/e32bee9384356aba...,https://i.scdn.co/image/ab67616d0000b273e4007e...,['Joanie Leeds and the Nightlights'],2,1,0.657,0.398,4.0,...,0.048,0.538,0.0,0.299,0.467,120.327,189467.0,4.0,,3.157783
1841,7b5OsAZ4FTmA0fyZwel0ar,Moon Moon Moon - Alternate Version,,https://i.scdn.co/image/ab67616d0000b273d13fb8...,['The Laurie Berkner Band'],2,25,0.585,0.0778,3.0,...,0.0673,0.894,0.0,0.126,0.314,95.38,110147.0,4.0,,1.835783
675,1GbA2OeSw27WxiHArYnRYD,Somebody Come and Play,https://p.scdn.co/mp3-preview/64f80bd2da135a2a...,https://i.scdn.co/image/ab67616d0000b273f8bae9...,"['Big Bird', 'The Sesame Street Kids']",2,19,0.739,0.315,9.0,...,0.0547,0.553,0.0,0.247,0.766,140.561,143827.0,4.0,Somebody come and play\r\nSomebody come and pl...,2.397117


### Example: Pick up a song using index, and make recommendations.

In [11]:
knn = load('../models/knn_audio_features.joblib')   # dump knn model
df = load('../models/songs_df_audio_features.joblib')


In [12]:
distance, indices = knn.kneighbors()

In [14]:
idx = 2
pd.DataFrame(df.iloc[idx]).T

Unnamed: 0,Track_ID,Track_Name,preview_url,image_url,Artists,Age,popularity,danceability,energy,key,...,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,lyrics,duration_min
2,2EEwSq98rKwlRWT7sNCLRc,Eensy Weensy Spider,https://p.scdn.co/mp3-preview/4cdc12aaeb7da4b7...,https://i.scdn.co/image/ab67616d0000b273d61faa...,"['Count Von Count', 'The Sesame Street Kids']",2,9,0.875,0.338,0,...,0.397,0.762,0.0,0.0992,0.962,116.027,94693.0,4,,1.578217


In [15]:
idx = 2         

print ("The song picked: ")
display(pd.DataFrame(df.iloc[idx]).T)

recom_idx = indices[idx]

print ("\nRecommendations: ")
df.iloc[recom_idx][0:5]

The song picked: 


Unnamed: 0,Track_ID,Track_Name,preview_url,image_url,Artists,Age,popularity,danceability,energy,key,...,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,lyrics,duration_min
2,2EEwSq98rKwlRWT7sNCLRc,Eensy Weensy Spider,https://p.scdn.co/mp3-preview/4cdc12aaeb7da4b7...,https://i.scdn.co/image/ab67616d0000b273d61faa...,"['Count Von Count', 'The Sesame Street Kids']",2,9,0.875,0.338,0,...,0.397,0.762,0.0,0.0992,0.962,116.027,94693.0,4,,1.578217



Recommendations: 


Unnamed: 0,Track_ID,Track_Name,preview_url,image_url,Artists,Age,popularity,danceability,energy,key,...,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,lyrics,duration_min
1804,4I7NWDIY2OHJkboYZuCe4c,I Chew,https://p.scdn.co/mp3-preview/30d2229bcff8c11a...,https://i.scdn.co/image/ab67616d0000b273c6e4b2...,['Hullabaloo'],2,3,0.748,0.467,0,...,0.299,0.792,0.0,0.181,0.963,114.937,69000.0,4,,1.15
776,1OwaPPFzjieVqN1y3fwmVs,Indoor-Outdoor Voices,https://p.scdn.co/mp3-preview/da1c52f108beaa63...,https://i.scdn.co/image/ab67616d0000b27312cd46...,['Barney'],2,18,0.867,0.484,0,...,0.331,0.758,0.0,0.0616,0.853,145.691,101133.0,4,,1.68555
668,36tMNdeGoMmkhXpUcweAvC,If You're Happy And You Know It,,https://i.scdn.co/image/ab67616d0000b2736b1fd9...,['Music For Little People Choir'],2,9,0.835,0.387,0,...,0.36,0.466,0.0,0.184,0.875,142.259,93960.0,4,,1.566
745,5rVx5Zo0wrhoK4K9hjZgaT,This Happy House,,https://i.scdn.co/image/ab67616d0000b2734a71aa...,['Jessica Harper'],2,1,0.774,0.486,0,...,0.426,0.567,0.0,0.0513,0.971,76.118,155693.0,4,,2.594883
741,2EUjk7qTInt05cu2V5fVGi,My Baby Is A Genius,,https://i.scdn.co/image/ab67616d0000b2734a71aa...,['Jessica Harper'],2,1,0.816,0.396,0,...,0.463,0.615,0.0,0.113,0.649,139.999,172160.0,4,,2.869333


### Write the Recommender as a python class

In [16]:
class SongRecommender():
    def __init__(self):
        self.df = None
        self.model = None
        self.distance = None
        self.indices = None
        
    def load_data(self, option='audio'):
        if option == 'audio':
            self.df = load('../models/songs_df_audio_features.joblib')
            self.model = load('../models/knn_audio_features.joblib')  
            knn = self.model
            self.distance, self.indices = knn.kneighbors()
    
    def make_recommendation(self, idx, num=5):
        num = min(num, 20)
        
        print ("Song Selected: ")
        display(self.df.iloc[[idx]])
        return self.df.iloc[self.indices[idx]][0:num]

        
        

In [17]:
recom = SongRecommender()

In [18]:
recom.load_data()


In [21]:
recom.make_recommendation(3)

Song Selected: 


Unnamed: 0,Track_ID,Track_Name,preview_url,image_url,Artists,Age,popularity,danceability,energy,key,...,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,lyrics,duration_min
3,0FG1XPQfLWKzJbayikwucU,Be Kind to Your Neighborhood Monsters,https://p.scdn.co/mp3-preview/9e0ab5d8ccc7d5cb...,https://i.scdn.co/image/ab67616d0000b273d61faa...,['Grover'],2,5,0.593,0.508,10,...,0.335,0.702,0.0,0.675,0.673,127.663,172200.0,4,,2.87


Unnamed: 0,Track_ID,Track_Name,preview_url,image_url,Artists,Age,popularity,danceability,energy,key,...,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,lyrics,duration_min
114,3YG9DzaY0Tmr5o9WdpDofO,Be Kind to Your Neighborhood Monsters,https://p.scdn.co/mp3-preview/9e0ab5d8ccc7d5cb...,https://i.scdn.co/image/ab67616d0000b2732d27e5...,['Grover'],2,5,0.593,0.508,10,...,0.335,0.702,0.0,0.675,0.673,127.663,172200.0,4,,2.87
3130,1z8039cpLZ8bMoKSCpoGZr,Hey Manny,,https://i.scdn.co/image/ab67616d0000b2738e7b5f...,['Manny and the Tools'],3,9,0.61,0.668,10,...,0.0713,0.705,0.0,0.567,0.896,136.669,105960.0,4,,1.766
612,1GfgRhQYFserXZ45gCByLq,Me Dance,https://p.scdn.co/mp3-preview/16624a9566bd312b...,https://i.scdn.co/image/ab67616d0000b273392a05...,['Yolanda Adams'],2,8,0.774,0.918,10,...,0.228,0.538,0.0,0.702,0.785,129.038,113027.0,4,,1.883783
879,4ELuHk1S0xpMIgGVOsX8me,Fuzzy and Blue (And Orange),https://p.scdn.co/mp3-preview/dc987d74b9a3b339...,https://i.scdn.co/image/ab67616d0000b273256d67...,"['Grover', 'Cookie Monster', 'Herry Monster', ...",2,39,0.561,0.594,10,...,0.412,0.523,0.0,0.347,0.585,137.665,153040.0,4,,2.550667
6564,0VRJtAPuTBrYTfAjVp7HD9,Whispering Bells,,https://i.scdn.co/image/ab67616d0000b27320c7ae...,['The Del-Vikings'],8,0,0.532,0.648,10,...,0.076,0.74,0.000454,0.487,0.644,127.193,146173.0,4,Whispering bells\r\nBeen so long\r\nWhispering...,2.436217
