In [1]:
import pandas as pd
import os
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from dotenv import load_dotenv
%load_ext dotenv
%dotenv


client_id = os.environ['SPOTIFY_CLIENT_ID']
client_secret = os.environ['SPOTIFY_CLIENT_SECRET']
spotify = spotipy.Spotify(
    client_credentials_manager=SpotifyClientCredentials(client_id = client_id, client_secret=client_secret))

## Load DataFrame

In [2]:
df = pd.read_csv("../data/all.csv")

In [3]:
df.columns

Index(['Age', 'Album_Name', 'Artist', 'Year', 'Description', 'Age Group',
       'Album_ID', 'Album_Name_Spotify', 'Artists_Spotify', 'Track_ID', 'ISRC',
       'Track_Name', 'Artists', 'popularity', 'preview_url', 'image_url',
       'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
       'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
       'type', 'uri', 'track_href', 'analysis_url', 'duration_ms',
       'time_signature', 'lyrics'],
      dtype='object')

In [4]:
columns = ['Track_ID','Track_Name', 'preview_url','image_url','Artists', 'Age','popularity','danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
       'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo','duration_ms',
       'time_signature']
df = df[columns].astype({'key': 'Int64', 'mode':'Int64', 'duration_ms':'Int64', 'time_signature':'Int64'})



In [5]:
df.head(3)

Unnamed: 0,Track_ID,Track_Name,preview_url,image_url,Artists,Age,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,2FPQI1LRwWszttbRG8hknk,Games Monsters Play,https://p.scdn.co/mp3-preview/33cc59cc1836954e...,https://i.scdn.co/image/ab67616d0000b273d61faa...,"['Herry Monster', 'Grover']",2,5,0.738,0.544,7,-8.557,1,0.346,0.212,0.0,0.0937,0.961,144.448,204267,4
1,6pOoswwC1lNBI2TapMdaEW,Afraid of the Dark,https://p.scdn.co/mp3-preview/cf340f0b536edadd...,https://i.scdn.co/image/ab67616d0000b273d61faa...,['Telly Monster'],2,5,0.505,0.525,0,-10.897,1,0.109,0.355,0.0,0.1,0.444,127.922,141240,4
2,2EEwSq98rKwlRWT7sNCLRc,Eensy Weensy Spider,https://p.scdn.co/mp3-preview/4cdc12aaeb7da4b7...,https://i.scdn.co/image/ab67616d0000b273d61faa...,"['Count Von Count', 'The Sesame Street Kids']",2,9,0.875,0.338,0,-11.382,1,0.397,0.762,0.0,0.0992,0.962,116.027,94693,4


In [6]:
df.columns[5:20]

Index(['Age', 'popularity', 'danceability', 'energy', 'key', 'loudness',
       'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness',
       'valence', 'tempo', 'duration_ms', 'time_signature'],
      dtype='object')

## Recommend Songs by KNN

### Fit Model

In [9]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import Normalizer

from sklearn.neighbors import NearestNeighbors

model = Pipeline([
    ('select features columns', ColumnTransformer([('select column','passthrough',slice(5,20))])),
    ('normalize', Normalizer()),
    ('knn', NearestNeighbors(n_neighbors=10))
])

In [10]:
df = df.dropna().drop_duplicates()
df = df.reset_index(drop=True)

model.fit(df)

Pipeline(steps=[('select features columns',
                 ColumnTransformer(transformers=[('select column',
                                                  'passthrough',
                                                  slice(5, 20, None))])),
                ('normalize', Normalizer()),
                ('knn', NearestNeighbors(n_neighbors=10))])

In [15]:
model[2]

NearestNeighbors(n_neighbors=10)

In [36]:
from joblib import dump, load
#dump(model[2], '../models/knn.joblib')   # dump knn model
#dump(df, '../models/songs_df.joblib')


In [11]:
distance, indices = model[2].kneighbors()

In [13]:
indices[0]

array([ 109, 1728, 1352,  432,  322,  882, 1615, 2640,  423, 1293],
      dtype=int64)

In [19]:
df.iloc[indices[1]][0:3]

Unnamed: 0,Track_ID,Track_Name,preview_url,image_url,Artists,Age,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
370,6yAArqwRhIx3aBZLh6fzyL,Everybody Dance,https://p.scdn.co/mp3-preview/51bd21a4ca87e14d...,https://i.scdn.co/image/ab67616d0000b273504563...,['Hap Palmer'],2,4,0.852,0.495,0,-11.383,1,0.045,0.205,9e-06,0.268,0.96,131.009,149773,4
954,378z6t1Ci06BLsrkaEORBV,Since You Asked,https://p.scdn.co/mp3-preview/f0a5c333d2bf49b5...,https://i.scdn.co/image/ab67616d0000b27398032c...,['Melissa Errico'],2,1,0.336,0.224,2,-13.677,1,0.0343,0.936,0.00103,0.17,0.199,176.052,193973,3
2062,08qNaFTXAWnE2bZt0uVItY,The Fine Friends Are Here,https://p.scdn.co/mp3-preview/640e68d4aaa54e02...,https://i.scdn.co/image/ab67616d0000b273bb8c48...,"['Dan Zanes', 'Friends']",3,6,0.541,0.598,2,-8.873,1,0.0342,0.361,0.00186,0.325,0.921,142.611,161973,4


### Example: Pick up a song using index, and make recommendations.

In [37]:
knn = load('../models/knn.joblib')   # dump knn model
df = load('../models/songs_df.joblib')


In [59]:
import random
random.sample(range(12),5)

[4, 11, 9, 3, 1]

In [22]:
distance, indices = knn.kneighbors()

In [23]:
indices

array([[  109,  1728,  1352, ...,  2640,   423,  1293],
       [  370,   954,  2062, ...,  1163,  2421,  2108],
       [ 2584,  2738,   368, ...,  2587,  3326,  1369],
       ...,
       [ 6664,  9148,  9149, ...,  3984,  4348,  4279],
       [10165,  4736, 10180, ...,  5079,  5442,  6921],
       [ 8013,  9044,  8881, ...,  5414, 10176,  6697]], dtype=int64)

In [25]:
idx = 2

print ("The song picked: ", df.iloc[idx]['Track_Name'])

recom_idx = indices[idx]

print ("\nRecommendations: ")
df.iloc[recom_idx][0:5]

The song picked:  Eensy Weensy Spider

Recommendations: 


Unnamed: 0,Track_ID,Track_Name,preview_url,image_url,Artists,Age,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
2584,6l2pGDGFht56LhBKDB4GrI,A Song of One,https://p.scdn.co/mp3-preview/ff739b27169d20fd...,https://i.scdn.co/image/ab67616d0000b273723deb...,['Tom Chapin'],4,11,0.687,0.32,0,-14.58,1,0.0456,0.276,0.0046,0.0793,0.611,181.271,150573,4
2738,736FALfTOtXFwFSwM67qAC,My Aunt Came Back,https://p.scdn.co/mp3-preview/0392adbe8ad3c749...,https://i.scdn.co/image/ab67616d0000b273f4264d...,['The Learning Station'],4,6,0.861,0.641,0,-6.443,1,0.0602,0.746,0.0,0.123,0.966,102.036,84000,4
368,1vF1Fpou7IqNvmi3EQRS4t,The Shapes That Surround You,https://p.scdn.co/mp3-preview/1916ea9fab5f8db8...,https://i.scdn.co/image/ab67616d0000b273504563...,['Hap Palmer'],2,18,0.518,0.383,4,-12.235,1,0.0318,0.431,0.0,0.309,0.776,188.088,152760,3
5,7jet0teuZs6qvKqCbyPHJz,Hey Little Worm,https://p.scdn.co/mp3-preview/ddcacfd61522b06b...,https://i.scdn.co/image/ab67616d0000b273d61faa...,"['Oscar the Grouch', 'The Sesame Street Grouch...",2,8,0.62,0.651,0,-6.745,1,0.0926,0.677,0.0,0.357,0.706,126.047,104440,4
281,21yqgjw9UTrcFW9IXVfqjI,Introduction to Olive Oil,https://p.scdn.co/mp3-preview/c6db9c79df1e4fef...,https://i.scdn.co/image/ab67616d0000b2730d5622...,['The Wiggles'],2,7,0.723,0.522,2,-9.512,1,0.934,0.824,0.0,0.271,0.6,88.909,69547,4


### Write it as a python method

In [34]:
def make_recommendation(idx, num=5):
    dists, indices = knn.kneighbors()
    num = min(num, 20)
    return df.iloc[indices[idx]][0:num]

In [35]:
make_recommendation(5)

Unnamed: 0,Track_ID,Track_Name,preview_url,image_url,Artists,Age,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
462,3mPHZwe70cl7GR5vtyYWi4,Space Explorers,https://p.scdn.co/mp3-preview/300e8b622a2a5dc5...,https://i.scdn.co/image/ab67616d0000b273392a05...,"[""Jack's Big Music Show Cast""]",2,8,0.57,0.695,0,-6.825,1,0.0312,0.025,0.0,0.0708,0.335,144.879,120773,4
2738,736FALfTOtXFwFSwM67qAC,My Aunt Came Back,https://p.scdn.co/mp3-preview/0392adbe8ad3c749...,https://i.scdn.co/image/ab67616d0000b273f4264d...,['The Learning Station'],4,6,0.861,0.641,0,-6.443,1,0.0602,0.746,0.0,0.123,0.966,102.036,84000,4
2305,3qNMAtCLwFemyXVBRcV757,Here Comes a Camel,https://p.scdn.co/mp3-preview/096d3b12cc40a29d...,https://i.scdn.co/image/ab67616d0000b2731dbf5f...,['The Wiggles'],3,10,0.804,0.716,0,-5.732,1,0.0374,0.223,0.0,0.135,0.962,136.011,115507,4
2584,6l2pGDGFht56LhBKDB4GrI,A Song of One,https://p.scdn.co/mp3-preview/ff739b27169d20fd...,https://i.scdn.co/image/ab67616d0000b273723deb...,['Tom Chapin'],4,11,0.687,0.32,0,-14.58,1,0.0456,0.276,0.0046,0.0793,0.611,181.271,150573,4
1848,5c0ujfluikmVGEuylO8vgl,Bailamos,https://p.scdn.co/mp3-preview/7552a1218cc4ab05...,https://i.scdn.co/image/ab67616d0000b27354c6df...,['Mister G'],2,8,0.671,0.767,2,-7.636,0,0.0587,0.0239,0.00925,0.0874,0.811,169.901,137893,4
