## Load DataFrame

In [1]:
import pandas as pd
import os

df = pd.read_csv("../data/all.csv")
df.columns                                # 'Artists_Spotify' is from spotify album info
                                          # 'Artists'  is from spotify track info

Index(['Age', 'Album_Name', 'Artist', 'Year', 'Description', 'Age Group',
       'Album_ID', 'Album_Name_Spotify', 'Artists_Spotify', 'Track_ID', 'ISRC',
       'Track_Name', 'Artists', 'popularity', 'preview_url', 'image_url',
       'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
       'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
       'type', 'uri', 'track_href', 'analysis_url', 'duration_ms',
       'time_signature', 'lyrics'],
      dtype='object')

In [8]:
select_columns = ['Track_ID', 'Track_Name', 'preview_url', 'image_url', 'Artists','Age', 'popularity',
       'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
       'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms',
       'time_signature', 'lyrics']

df = df[select_columns]

df['duration_min'] = df['duration_ms']/10**3/60

print (df.shape)
df.head(3)

(19560, 22)


Unnamed: 0,Track_ID,Track_Name,preview_url,image_url,Artists,Age,popularity,danceability,energy,key,...,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,lyrics,duration_min
0,2FPQI1LRwWszttbRG8hknk,Games Monsters Play,https://p.scdn.co/mp3-preview/33cc59cc1836954e...,https://i.scdn.co/image/ab67616d0000b273d61faa...,"['Herry Monster', 'Grover']",2,5,0.738,0.544,7.0,...,0.346,0.212,0.0,0.0937,0.961,144.448,204267.0,4.0,,3.40445
1,6pOoswwC1lNBI2TapMdaEW,Afraid of the Dark,https://p.scdn.co/mp3-preview/cf340f0b536edadd...,https://i.scdn.co/image/ab67616d0000b273d61faa...,['Telly Monster'],2,5,0.505,0.525,0.0,...,0.109,0.355,0.0,0.1,0.444,127.922,141240.0,4.0,,2.354
2,2EEwSq98rKwlRWT7sNCLRc,Eensy Weensy Spider,https://p.scdn.co/mp3-preview/4cdc12aaeb7da4b7...,https://i.scdn.co/image/ab67616d0000b273d61faa...,"['Count Von Count', 'The Sesame Street Kids']",2,9,0.875,0.338,0.0,...,0.397,0.762,0.0,0.0992,0.962,116.027,94693.0,4.0,,1.578217


## Recommend Songs by KNN

### Fit Model

In [9]:
feature_columns = ['key','mode', 'time_signature', 'duration_min','popularity', 
                   'danceability', 'energy','loudness', 'speechiness',
                   'acousticness', 'instrumentalness', 'liveness', 'valence', 
                   'tempo']
df_audio = df.dropna(subset=feature_columns)
df_audio = df_audio.drop_duplicates() #.reset_index(drop=True)
df_audio = df_audio.astype({'key': 'Int64', 'mode':'Int64', 'time_signature':'Int64'})

In [5]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import Normalizer
from sklearn.preprocessing import OneHotEncoder

from sklearn.neighbors import NearestNeighbors

categorical_columns = ['key','mode', 'time_signature']

numeric_columns = ['Age','duration_min','popularity', 'danceability', 'energy','loudness', 'speechiness',
       'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']

features = ColumnTransformer([
    ('categorical', OneHotEncoder(), categorical_columns),
    ('numeric', 'passthrough', numeric_columns)
])


model = Pipeline([
    ('features', features),
    ('normalize', Normalizer()),
    ('knn', NearestNeighbors(n_neighbors=10))
])

In [38]:
model.fit(df_audio);

In [13]:
model[2]

NearestNeighbors(n_neighbors=10)

In [36]:
from joblib import dump, load
dump(model[2], '../models/knn_audio_features.joblib')   # dump knn model
dump(df_audio, '../models/songs_df_audio_features.joblib');


In [16]:
distance, indices = model[2].kneighbors()             

In [None]:
indices[0]    # indices[i] is the list of the nearest kneightbors for item-i

In [None]:
df.iloc[indices[1]][0:3]   # show the first three recommendation for song-1

### Example: Pick up a song using index, and make recommendations.

In [37]:
knn = load('../models/knn_audio_features.joblib')   # dump knn model
df = load('../models/songs_df_audio_features.joblib')


In [18]:
distance, indices = knn.kneighbors()

In [32]:
pd.DataFrame(df.iloc[idx]).T

Unnamed: 0,Track_ID,Track_Name,preview_url,image_url,Artists,Age,popularity,danceability,energy,key,...,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,lyrics,duration_min
2,2EEwSq98rKwlRWT7sNCLRc,Eensy Weensy Spider,https://p.scdn.co/mp3-preview/4cdc12aaeb7da4b7...,https://i.scdn.co/image/ab67616d0000b273d61faa...,"['Count Von Count', 'The Sesame Street Kids']",2,9,0.875,0.338,0,...,0.397,0.762,0.0,0.0992,0.962,116.027,94693.0,4,,1.578217


In [35]:
idx = 2         

print ("The song picked: ")
display(pd.DataFrame(df.iloc[idx]).T)

recom_idx = indices[idx]

print ("\nRecommendations: ")
df.iloc[recom_idx][0:5]

The song picked: 


Unnamed: 0,Track_ID,Track_Name,preview_url,image_url,Artists,Age,popularity,danceability,energy,key,...,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,lyrics,duration_min
2,2EEwSq98rKwlRWT7sNCLRc,Eensy Weensy Spider,https://p.scdn.co/mp3-preview/4cdc12aaeb7da4b7...,https://i.scdn.co/image/ab67616d0000b273d61faa...,"['Count Von Count', 'The Sesame Street Kids']",2,9,0.875,0.338,0,...,0.397,0.762,0.0,0.0992,0.962,116.027,94693.0,4,,1.578217



Recommendations: 


Unnamed: 0,Track_ID,Track_Name,preview_url,image_url,Artists,Age,popularity,danceability,energy,key,...,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,lyrics,duration_min
968,616Uq1Ft4Rl56FbH1H5XXH,Lady Bug Picnic,https://p.scdn.co/mp3-preview/773afcdea6634010...,https://i.scdn.co/image/ab67616d0000b27338381e...,['Victor Johnson'],2,10,0.723,0.42,0,...,0.0908,0.809,3.5e-05,0.0996,0.914,120.471,114573.0,4,,1.90955
652,2p62Pt8bkKLxpMuWiMC9oQ,I'm A Little Teapot,,https://i.scdn.co/image/ab67616d0000b2736b1fd9...,['Music For Little People Choir'],2,9,0.73,0.334,0,...,0.156,0.78,0.0,0.675,0.773,124.561,182547.0,4,,3.04245
657,5GyyMAq40AOBOkhtTc2IM3,Mary Had A Little Lamb,,https://i.scdn.co/image/ab67616d0000b2736b1fd9...,['Music For Little People Choir'],2,10,0.708,0.358,9,...,0.0433,0.482,0.0,0.0857,0.69,140.069,164827.0,4,,2.747117
2683,3Bob4IPl4maAnBPiTZbuxK,Siyahamba,https://p.scdn.co/mp3-preview/702f6bf219f80c11...,https://i.scdn.co/image/ab67616d0000b273cb6801...,"['Dan Zanes', 'Friends']",3,11,0.499,0.338,0,...,0.0308,0.802,7e-06,0.0996,0.559,135.923,179840.0,4,,2.997333
1470,0YawYQdvhpuWNSMXqY51xm,A Cat Had a Birthday,https://p.scdn.co/mp3-preview/74d99be6214ed824...,https://i.scdn.co/image/ab67616d0000b2738cc725...,"[""Sesame Street's David""]",2,9,0.837,0.458,5,...,0.172,0.355,0.0,0.117,0.919,123.939,164507.0,4,,2.741783


### Write it as a python class

In [63]:
class SongRecommender():
    def __init__(self):
        self.df = None
        self.model = None
        self.distance = None
        self.indices = None
        
    def load_data(self, option='audio'):
        if option == 'audio':
            self.df = load('../models/songs_df_audio_features.joblib')
            self.model = load('../models/knn_audio_features.joblib')  
            knn = self.model
            self.distance, self.indices = knn.kneighbors()
    
    def make_recommendation(self, idx, num=5):
        num = min(num, 20)
        
        print ("Song Selected: ")
        display(self.df.iloc[[idx]])
        return self.df.iloc[self.indices[idx]][0:num]

        
        

In [64]:
recom = SongRecommender()

In [65]:
recom.load_data()


In [66]:
recom.make_recommendation(2)

Song Selected: 


Unnamed: 0,Track_ID,Track_Name,preview_url,image_url,Artists,Age,popularity,danceability,energy,key,...,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,lyrics,duration_min
2,2EEwSq98rKwlRWT7sNCLRc,Eensy Weensy Spider,https://p.scdn.co/mp3-preview/4cdc12aaeb7da4b7...,https://i.scdn.co/image/ab67616d0000b273d61faa...,"['Count Von Count', 'The Sesame Street Kids']",2,9,0.875,0.338,0,...,0.397,0.762,0.0,0.0992,0.962,116.027,94693.0,4,,1.578217


Unnamed: 0,Track_ID,Track_Name,preview_url,image_url,Artists,Age,popularity,danceability,energy,key,...,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,lyrics,duration_min
968,616Uq1Ft4Rl56FbH1H5XXH,Lady Bug Picnic,https://p.scdn.co/mp3-preview/773afcdea6634010...,https://i.scdn.co/image/ab67616d0000b27338381e...,['Victor Johnson'],2,10,0.723,0.42,0,...,0.0908,0.809,3.5e-05,0.0996,0.914,120.471,114573.0,4,,1.90955
652,2p62Pt8bkKLxpMuWiMC9oQ,I'm A Little Teapot,,https://i.scdn.co/image/ab67616d0000b2736b1fd9...,['Music For Little People Choir'],2,9,0.73,0.334,0,...,0.156,0.78,0.0,0.675,0.773,124.561,182547.0,4,,3.04245
657,5GyyMAq40AOBOkhtTc2IM3,Mary Had A Little Lamb,,https://i.scdn.co/image/ab67616d0000b2736b1fd9...,['Music For Little People Choir'],2,10,0.708,0.358,9,...,0.0433,0.482,0.0,0.0857,0.69,140.069,164827.0,4,,2.747117
2683,3Bob4IPl4maAnBPiTZbuxK,Siyahamba,https://p.scdn.co/mp3-preview/702f6bf219f80c11...,https://i.scdn.co/image/ab67616d0000b273cb6801...,"['Dan Zanes', 'Friends']",3,11,0.499,0.338,0,...,0.0308,0.802,7e-06,0.0996,0.559,135.923,179840.0,4,,2.997333
1470,0YawYQdvhpuWNSMXqY51xm,A Cat Had a Birthday,https://p.scdn.co/mp3-preview/74d99be6214ed824...,https://i.scdn.co/image/ab67616d0000b2738cc725...,"[""Sesame Street's David""]",2,9,0.837,0.458,5,...,0.172,0.355,0.0,0.117,0.919,123.939,164507.0,4,,2.741783


In [22]:
def make_recommendation(idx, num=5):
    dists, indices = knn.kneighbors()
    num = min(num, 20)
    return df.iloc[indices[idx]][0:num]

In [23]:
make_recommendation(5).drop(columns=['Track_ID','preview_url','image_url'])

Unnamed: 0,Track_Name,Artists,Age,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,lyrics,duration_min
1477,Do De Rubber Duck,"['Ernie', 'Kermit the Frog', 'Oscar the Grouch...",2,10,0.801,0.595,0,-8.096,1,0.379,0.18,0.0,0.309,0.89,153.626,174547.0,4,Ernie: There's a brand new dance\r\nAnd it's g...,2.909117
1937,High up on the Trapeze,['The Wiggles'],2,8,0.648,0.553,0,-6.318,1,0.0277,0.658,0.000379,0.29,0.811,112.0,106440.0,4,,1.774
405,Ya Gotta Have Pep,['John Lithgow'],2,9,0.587,0.62,0,-6.432,1,0.224,0.373,0.0,0.0338,0.961,141.99,119347.0,4,You gotta have pep\r\nYou gotta have poop and ...,1.989117
2762,Choo Choo Ch'Boogie,"['Dan Zanes', 'Friends', 'Rankin Don aka Fathe...",3,10,0.645,0.612,7,-8.672,1,0.376,0.788,0.0,0.214,0.854,160.585,162307.0,4,,2.705117
681,The Word is No,"[""Sesame Street's Gina"", ""Sesame Street's Maria""]",2,8,0.929,0.762,7,-6.55,1,0.033,0.422,0.000112,0.154,0.971,125.591,116000.0,4,No parking\r\nNo biking\r\nNo swimming\r\nNo h...,1.933333
