In [1]:
import pandas as pd
import os
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from dotenv import load_dotenv
%load_ext dotenv
%dotenv


client_id = os.environ['SPOTIFY_CLIENT_ID']
client_secret = os.environ['SPOTIFY_CLIENT_SECRET']
spotify = spotipy.Spotify(
    client_credentials_manager=SpotifyClientCredentials(client_id = client_id, client_secret=client_secret))

## Load DataFrame

In [2]:
df = pd.read_csv("../data/all.csv")

In [3]:
df.columns

Index(['Age', 'Album_Name', 'Artist', 'Year', 'Description', 'Age Group',
       'Album_ID', 'Album_Name_Spotify', 'Artists_Spotify', 'Track_ID', 'ISRC',
       'Track_Name', 'Artists', 'popularity', 'preview_url', 'danceability',
       'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness',
       'instrumentalness', 'liveness', 'valence', 'tempo', 'type', 'uri',
       'track_href', 'analysis_url', 'duration_ms', 'time_signature',
       'lyrics'],
      dtype='object')

In [4]:
df = pd.read_csv("../data/all.csv")
columns = ['Track_ID','Track_Name','Artists', 'Age','popularity','danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
       'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo','duration_ms',
       'time_signature']
df = df[columns].astype({'key': 'Int64', 'mode':'Int64', 'duration_ms':'Int64', 'time_signature':'Int64'})



In [5]:
df.head(3)

Unnamed: 0,Track_ID,Track_Name,Artists,Age,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,2FPQI1LRwWszttbRG8hknk,Games Monsters Play,"['Herry Monster', 'Grover']",2,6,0.738,0.544,7,-8.557,1,0.346,0.212,0.0,0.0937,0.961,144.448,204267,4
1,6pOoswwC1lNBI2TapMdaEW,Afraid of the Dark,['Telly Monster'],2,6,0.505,0.525,0,-10.897,1,0.109,0.355,0.0,0.1,0.444,127.922,141240,4
2,2EEwSq98rKwlRWT7sNCLRc,Eensy Weensy Spider,"['Count Von Count', 'The Sesame Street Kids']",2,10,0.875,0.338,0,-11.382,1,0.397,0.762,0.0,0.0992,0.962,116.027,94693,4


In [6]:
df.columns[3:18]

Index(['Age', 'popularity', 'danceability', 'energy', 'key', 'loudness',
       'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness',
       'valence', 'tempo', 'duration_ms', 'time_signature'],
      dtype='object')

## Recommend Songs by KNN

### Fit Model

In [7]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import Normalizer

from sklearn.neighbors import NearestNeighbors

pipe = Pipeline([
    ('select features columns', ColumnTransformer([('select column','passthrough',slice(3,18))])),
    ('normalize', Normalizer())
    
])

In [8]:
df = df.dropna().drop_duplicates()
features = pipe.fit_transform(df)

In [9]:
nn = NearestNeighbors(n_neighbors=10).fit(features)

### Example: Pick up a song using index, and make recommendations.

In [10]:
idx = 2

print ("The song picked: ", df.iloc[2]['Track_Name'])

dists, indices = nn.kneighbors(features[idx].reshape(1,-1))

print ("\nRecommendations: ")
df.iloc[indices[0]][0:5]

The song picked:  Eensy Weensy Spider

Recommendations: 


Unnamed: 0,Track_ID,Track_Name,Artists,Age,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
2,2EEwSq98rKwlRWT7sNCLRc,Eensy Weensy Spider,"['Count Von Count', 'The Sesame Street Kids']",2,10,0.875,0.338,0,-11.382,1,0.397,0.762,0.0,0.0992,0.962,116.027,94693,4
1572,2gceTtVQDk9XgJoIv0Pnbe,Going On A Hunt (Hand Jive),['The Laurie Berkner Band'],2,11,0.64,0.384,4,-10.435,1,0.664,0.785,0.0,0.356,0.164,129.898,103613,4
3473,6l2pGDGFht56LhBKDB4GrI,A Song of One,['Tom Chapin'],4,10,0.687,0.32,0,-14.58,1,0.0456,0.276,0.0046,0.0793,0.611,181.271,150573,4
510,1vF1Fpou7IqNvmi3EQRS4t,The Shapes That Surround You,['Hap Palmer'],2,18,0.518,0.383,4,-12.235,1,0.0318,0.431,0.0,0.309,0.776,188.088,152760,3
1929,33pFqqDLFTTSfU9Qc22aj7,How Are We Going to Wake up Jeff?,['The Wiggles'],2,10,0.65,0.762,0,-5.401,1,0.299,0.165,0.0,0.295,0.92,98.681,79226,4


### Write it as a python method

In [11]:
def make_recommendation(idx, num=5):
    dists, indices = nn.kneighbors(features[idx].reshape(1,-1))
    num = min(num, 20)
    return df.iloc[indices[0]][0:num]

In [12]:
make_recommendation(5)

Unnamed: 0,Track_ID,Track_Name,Artists,Age,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
5,7jet0teuZs6qvKqCbyPHJz,Hey Little Worm,"['Oscar the Grouch', 'The Sesame Street Grouch...",2,8,0.62,0.651,0,-6.745,1,0.0926,0.677,0.0,0.357,0.706,126.047,104440,4
405,3j767S2jxAmw0hiJoZ7ddU,Ya Gotta Have Pep,['John Lithgow'],2,8,0.587,0.62,0,-6.432,1,0.224,0.373,0.0,0.0338,0.961,141.99,119347,4
617,3mPHZwe70cl7GR5vtyYWi4,Space Explorers,"[""Jack's Big Music Show Cast""]",2,7,0.57,0.695,0,-6.825,1,0.0312,0.025,0.0,0.0708,0.335,144.879,120773,4
2022,5fmH8Sn9f85UeBWnQxbbYx,"Easy Come, Easy Go",['Imagination Movers'],2,8,0.755,0.903,2,-4.54,1,0.0349,0.504,0.0,0.123,0.889,123.014,102240,4
3657,736FALfTOtXFwFSwM67qAC,My Aunt Came Back,['The Learning Station'],4,6,0.861,0.641,0,-6.443,1,0.0602,0.746,0.0,0.123,0.966,102.036,84000,4
