# Music Recommender

### IMPORT LIBRARIES

In [2]:
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

from sklearn.metrics import silhouette_score, pairwise_distances_argmin_min
from yellowbrick.cluster import SilhouetteVisualizer
from kneed import KneeLocator

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import config

### LOAD DATASETS

In [3]:
# Top Songs
top_all = pd.read_csv('top_tracks.csv')

# Spotify Dataset
full_all = pd.read_csv('final_full_list2.csv')

In [4]:
top_all

Unnamed: 0,track_id,song_title,artist_name,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,id,duration_ms
0,59uQI0PADDKeE6UZDTJEe8,Last Night,Morgan Wallen,0.517,0.675,6,-5.382,1,0.0357,0.45900,0.000000,0.1510,0.518,203.853,59uQI0PADDKeE6UZDTJEe8,163855
1,1Lo0QY9cvc8sUB2vnIOxDT,Fast Car,Luke Combs,0.712,0.603,8,-5.520,1,0.0262,0.18600,0.000000,0.1150,0.670,97.994,1Lo0QY9cvc8sUB2vnIOxDT,265493
2,1s7oOCT8vauUh01PbJD6ps,Calm Down,Rema & Selena Gomez,0.799,0.802,11,-5.196,1,0.0371,0.42900,0.001280,0.1710,0.811,107.008,1s7oOCT8vauUh01PbJD6ps,239318
3,1s7oOCT8vauUh01PbJD6ps,Calm Down (with Selena Gomez),Rema,0.799,0.802,11,-5.196,1,0.0371,0.42900,0.001280,0.1710,0.811,107.008,1s7oOCT8vauUh01PbJD6ps,239318
4,0yLdNVWF3Srea0uzk55zFn,Flowers,Miley Cyrus,0.707,0.681,0,-4.325,1,0.0668,0.06320,0.000005,0.0322,0.646,117.999,0yLdNVWF3Srea0uzk55zFn,200455
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
159,4lIxdJw6W3Fg4vUIYCB0S5,Style,Taylor Swift,0.598,0.786,2,-5.572,1,0.0383,0.00253,0.001600,0.1170,0.456,95.019,4lIxdJw6W3Fg4vUIYCB0S5,231000
160,5IAESfJjmOYu7cHyX557kz,Take Two,BTS,0.617,0.589,7,-5.978,1,0.0345,0.02680,0.000000,0.3780,0.566,92.991,5IAESfJjmOYu7cHyX557kz,229953
161,3JvKfv6T31zO0ini8iNItO,Another Love,Tom Odell,0.445,0.537,4,-8.532,0,0.0400,0.69500,0.000017,0.0944,0.131,122.769,3JvKfv6T31zO0ini8iNItO,244360
162,4FAKtPVycI4DxoOHC01YqD,Yandel 150,Yandel,0.783,0.729,6,-3.549,0,0.0691,0.04920,0.000272,0.1000,0.580,167.968,4FAKtPVycI4DxoOHC01YqD,216148


In [5]:
top_all.drop(["id", "duration_ms"], axis=1, inplace=True)

In [6]:
full_all.drop("duration_ms", axis=1, inplace=True)

In [7]:
full_all.columns = ['track_id', 'song_title', 'artist_name', 'danceability', 'energy',
       'key', 'loudness', 'mode', 'speechiness', 'acousticness',
       'instrumentalness', 'liveness', 'valence', 'tempo']

In [8]:
# add labels for filtering
top_all['group']= "t"
# add labels for filtering
full_all['group']= "f"

In [9]:
top_all

Unnamed: 0,track_id,song_title,artist_name,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,group
0,59uQI0PADDKeE6UZDTJEe8,Last Night,Morgan Wallen,0.517,0.675,6,-5.382,1,0.0357,0.45900,0.000000,0.1510,0.518,203.853,t
1,1Lo0QY9cvc8sUB2vnIOxDT,Fast Car,Luke Combs,0.712,0.603,8,-5.520,1,0.0262,0.18600,0.000000,0.1150,0.670,97.994,t
2,1s7oOCT8vauUh01PbJD6ps,Calm Down,Rema & Selena Gomez,0.799,0.802,11,-5.196,1,0.0371,0.42900,0.001280,0.1710,0.811,107.008,t
3,1s7oOCT8vauUh01PbJD6ps,Calm Down (with Selena Gomez),Rema,0.799,0.802,11,-5.196,1,0.0371,0.42900,0.001280,0.1710,0.811,107.008,t
4,0yLdNVWF3Srea0uzk55zFn,Flowers,Miley Cyrus,0.707,0.681,0,-4.325,1,0.0668,0.06320,0.000005,0.0322,0.646,117.999,t
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
159,4lIxdJw6W3Fg4vUIYCB0S5,Style,Taylor Swift,0.598,0.786,2,-5.572,1,0.0383,0.00253,0.001600,0.1170,0.456,95.019,t
160,5IAESfJjmOYu7cHyX557kz,Take Two,BTS,0.617,0.589,7,-5.978,1,0.0345,0.02680,0.000000,0.3780,0.566,92.991,t
161,3JvKfv6T31zO0ini8iNItO,Another Love,Tom Odell,0.445,0.537,4,-8.532,0,0.0400,0.69500,0.000017,0.0944,0.131,122.769,t
162,4FAKtPVycI4DxoOHC01YqD,Yandel 150,Yandel,0.783,0.729,6,-3.549,0,0.0691,0.04920,0.000272,0.1000,0.580,167.968,t


In [10]:
full_all

Unnamed: 0,track_id,song_title,artist_name,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,group
0,0gNNToCW3qjabgTyBSjt3H,!Que Vida! - Mono Version,Love,0.600,0.540,9,-11.803,1,0.0328,0.52500,0.003050,0.1000,0.547,125.898,f
1,0tMgFpOrXZR6irEOLNWwJL,"""40""",U2,0.368,0.480,8,-11.605,1,0.0306,0.22800,0.707000,0.1590,0.338,150.166,f
2,2ZywW3VyVx6rrlrX75n3JB,"""40"" - Live",U2,0.272,0.684,8,-9.728,1,0.0505,0.09980,0.014500,0.9460,0.279,143.079,f
3,6DdWA7D1o5TU2kXWyCLcch,"""40"" - Remastered 2008",U2,0.371,0.545,8,-9.315,1,0.0307,0.18500,0.582000,0.1830,0.310,150.316,f
4,3vMmwsAiLDCfyc1jl76lQE,"""40"" - Remastered 2008",U2,0.371,0.545,8,-9.315,1,0.0307,0.18500,0.582000,0.1830,0.310,150.316,f
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
174035,7JSH9jmLF2BfoZmYlQq5I0,Physical,Dua Lipa,0.643,0.813,0,-4.819,1,0.0492,0.01680,0.000344,0.1030,0.747,146.983,f
174036,4de1X6v99U7tfOXrNUCTbi,Candy,Doja Cat,0.689,0.516,8,-5.857,1,0.0444,0.51300,0.000000,0.1680,0.209,124.876,f
174037,1GVF9369j7InydwGztCDIZ,Good In Goodbye,Madison Beer,0.658,0.698,11,-5.950,0,0.1770,0.43300,0.000000,0.1740,0.456,139.054,f
174038,6qNB2ChCVPepl5ZjVJJTUW,STUPID (feat. Yung Baby Tate),Ashnikko,0.772,0.637,2,-6.881,1,0.1140,0.00459,0.000000,0.0778,0.540,149.906,f


In [11]:
# Joining 2 dfs together including group labels t for top list and f for full list
all = pd.concat([top_all, full_all])

In [12]:
all

Unnamed: 0,track_id,song_title,artist_name,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,group
0,59uQI0PADDKeE6UZDTJEe8,Last Night,Morgan Wallen,0.517,0.675,6,-5.382,1,0.0357,0.45900,0.000000,0.1510,0.518,203.853,t
1,1Lo0QY9cvc8sUB2vnIOxDT,Fast Car,Luke Combs,0.712,0.603,8,-5.520,1,0.0262,0.18600,0.000000,0.1150,0.670,97.994,t
2,1s7oOCT8vauUh01PbJD6ps,Calm Down,Rema & Selena Gomez,0.799,0.802,11,-5.196,1,0.0371,0.42900,0.001280,0.1710,0.811,107.008,t
3,1s7oOCT8vauUh01PbJD6ps,Calm Down (with Selena Gomez),Rema,0.799,0.802,11,-5.196,1,0.0371,0.42900,0.001280,0.1710,0.811,107.008,t
4,0yLdNVWF3Srea0uzk55zFn,Flowers,Miley Cyrus,0.707,0.681,0,-4.325,1,0.0668,0.06320,0.000005,0.0322,0.646,117.999,t
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
174035,7JSH9jmLF2BfoZmYlQq5I0,Physical,Dua Lipa,0.643,0.813,0,-4.819,1,0.0492,0.01680,0.000344,0.1030,0.747,146.983,f
174036,4de1X6v99U7tfOXrNUCTbi,Candy,Doja Cat,0.689,0.516,8,-5.857,1,0.0444,0.51300,0.000000,0.1680,0.209,124.876,f
174037,1GVF9369j7InydwGztCDIZ,Good In Goodbye,Madison Beer,0.658,0.698,11,-5.950,0,0.1770,0.43300,0.000000,0.1740,0.456,139.054,f
174038,6qNB2ChCVPepl5ZjVJJTUW,STUPID (feat. Yung Baby Tate),Ashnikko,0.772,0.637,2,-6.881,1,0.1140,0.00459,0.000000,0.0778,0.540,149.906,f


In [13]:
# remove track ids to find more duplicates
all_no_id = all.drop("track_id", axis=1)

In [14]:
# found same song at the top of the list. Duplicate can be removed right away.
all_no_id = all_no_id.drop(index=3).reset_index(drop=True)

In [15]:
all_no_id

Unnamed: 0,song_title,artist_name,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,group
0,Last Night,Morgan Wallen,0.517,0.675,6,-5.382,1,0.0357,0.45900,0.000000,0.1510,0.518,203.853,t
1,Fast Car,Luke Combs,0.712,0.603,8,-5.520,1,0.0262,0.18600,0.000000,0.1150,0.670,97.994,t
2,Calm Down,Rema & Selena Gomez,0.799,0.802,11,-5.196,1,0.0371,0.42900,0.001280,0.1710,0.811,107.008,t
3,Flowers,Miley Cyrus,0.707,0.681,0,-4.325,1,0.0668,0.06320,0.000005,0.0322,0.646,117.999,t
4,All My Life,Lil Durk Featuring J. Cole,0.829,0.436,3,-8.205,1,0.3270,0.15000,0.000000,0.0954,0.693,143.031,t
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
174197,Physical,Dua Lipa,0.643,0.813,0,-4.819,1,0.0492,0.01680,0.000344,0.1030,0.747,146.983,f
174198,Candy,Doja Cat,0.689,0.516,8,-5.857,1,0.0444,0.51300,0.000000,0.1680,0.209,124.876,f
174199,Good In Goodbye,Madison Beer,0.658,0.698,11,-5.950,0,0.1770,0.43300,0.000000,0.1740,0.456,139.054,f
174200,STUPID (feat. Yung Baby Tate),Ashnikko,0.772,0.637,2,-6.881,1,0.1140,0.00459,0.000000,0.0778,0.540,149.906,f


In [16]:
# checking for duplicates. Found 3914
all_no_id.duplicated().sum()

3914

In [17]:
# drop duplicates
song_list = all_no_id.drop_duplicates()

In [18]:
song_list

Unnamed: 0,song_title,artist_name,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,group
0,Last Night,Morgan Wallen,0.517,0.675,6,-5.382,1,0.0357,0.45900,0.000000,0.1510,0.518,203.853,t
1,Fast Car,Luke Combs,0.712,0.603,8,-5.520,1,0.0262,0.18600,0.000000,0.1150,0.670,97.994,t
2,Calm Down,Rema & Selena Gomez,0.799,0.802,11,-5.196,1,0.0371,0.42900,0.001280,0.1710,0.811,107.008,t
3,Flowers,Miley Cyrus,0.707,0.681,0,-4.325,1,0.0668,0.06320,0.000005,0.0322,0.646,117.999,t
4,All My Life,Lil Durk Featuring J. Cole,0.829,0.436,3,-8.205,1,0.3270,0.15000,0.000000,0.0954,0.693,143.031,t
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
174197,Physical,Dua Lipa,0.643,0.813,0,-4.819,1,0.0492,0.01680,0.000344,0.1030,0.747,146.983,f
174198,Candy,Doja Cat,0.689,0.516,8,-5.857,1,0.0444,0.51300,0.000000,0.1680,0.209,124.876,f
174199,Good In Goodbye,Madison Beer,0.658,0.698,11,-5.950,0,0.1770,0.43300,0.000000,0.1740,0.456,139.054,f
174200,STUPID (feat. Yung Baby Tate),Ashnikko,0.772,0.637,2,-6.881,1,0.1140,0.00459,0.000000,0.0778,0.540,149.906,f


### TRANSFORMATION

In [19]:
# get audio features
song_list_features = song_list.select_dtypes(np.number)

In [20]:
song_list_features

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,0.517,0.675,6,-5.382,1,0.0357,0.45900,0.000000,0.1510,0.518,203.853
1,0.712,0.603,8,-5.520,1,0.0262,0.18600,0.000000,0.1150,0.670,97.994
2,0.799,0.802,11,-5.196,1,0.0371,0.42900,0.001280,0.1710,0.811,107.008
3,0.707,0.681,0,-4.325,1,0.0668,0.06320,0.000005,0.0322,0.646,117.999
4,0.829,0.436,3,-8.205,1,0.3270,0.15000,0.000000,0.0954,0.693,143.031
...,...,...,...,...,...,...,...,...,...,...,...
174197,0.643,0.813,0,-4.819,1,0.0492,0.01680,0.000344,0.1030,0.747,146.983
174198,0.689,0.516,8,-5.857,1,0.0444,0.51300,0.000000,0.1680,0.209,124.876
174199,0.658,0.698,11,-5.950,0,0.1770,0.43300,0.000000,0.1740,0.456,139.054
174200,0.772,0.637,2,-6.881,1,0.1140,0.00459,0.000000,0.0778,0.540,149.906


In [21]:
# scaling features
scaler = StandardScaler()
scaled_features = scaler.fit_transform(song_list_features)
scaled_features = pd.DataFrame(scaled_features, columns=song_list_features.columns)
scaled_features.head()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,-0.131078,0.685544,0.227286,1.053706,0.645564,-0.392084,-0.078722,-0.527896,-0.316878,-0.050361,2.830315
1,0.981581,0.416409,0.79584,1.029208,0.645564,-0.455326,-0.80419,-0.527896,-0.520225,0.529286,-0.622168
2,1.477998,1.160269,1.648672,1.086726,0.645564,-0.382764,-0.158444,-0.523783,-0.203907,1.066984,-0.328186
3,0.953051,0.707972,-1.478378,1.241351,0.645564,-0.185049,-1.130518,-0.527879,-0.987923,0.437762,0.030274
4,1.649176,-0.207836,-0.625546,0.552552,0.645564,1.547124,-0.899856,-0.527896,-0.630936,0.616995,0.846668


In [22]:
scaled_features.shape

(170288, 11)

In [23]:
pca = PCA(2) 
list_2D = pca.fit_transform(scaled_features)

> - From previous modeling, tuning and evaluation, we found the best k to be 7.
> - Also, applying PCA and KMeans model resulted in the best silhouette score.

### K-MEANS

In [24]:
model = KMeans(n_clusters=7, random_state=1, n_init=10)
clusters =  model.fit_predict(list_2D)

In [25]:
len(clusters)

170288

In [26]:
song_list.insert(14, "cluster", clusters)

In [27]:
song_list

Unnamed: 0,song_title,artist_name,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,group,cluster
0,Last Night,Morgan Wallen,0.517,0.675,6,-5.382,1,0.0357,0.45900,0.000000,0.1510,0.518,203.853,t,0
1,Fast Car,Luke Combs,0.712,0.603,8,-5.520,1,0.0262,0.18600,0.000000,0.1150,0.670,97.994,t,2
2,Calm Down,Rema & Selena Gomez,0.799,0.802,11,-5.196,1,0.0371,0.42900,0.001280,0.1710,0.811,107.008,t,2
3,Flowers,Miley Cyrus,0.707,0.681,0,-4.325,1,0.0668,0.06320,0.000005,0.0322,0.646,117.999,t,2
4,All My Life,Lil Durk Featuring J. Cole,0.829,0.436,3,-8.205,1,0.3270,0.15000,0.000000,0.0954,0.693,143.031,t,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
174197,Physical,Dua Lipa,0.643,0.813,0,-4.819,1,0.0492,0.01680,0.000344,0.1030,0.747,146.983,f,2
174198,Candy,Doja Cat,0.689,0.516,8,-5.857,1,0.0444,0.51300,0.000000,0.1680,0.209,124.876,f,6
174199,Good In Goodbye,Madison Beer,0.658,0.698,11,-5.950,0,0.1770,0.43300,0.000000,0.1740,0.456,139.054,f,2
174200,STUPID (feat. Yung Baby Tate),Ashnikko,0.772,0.637,2,-6.881,1,0.1140,0.00459,0.000000,0.0778,0.540,149.906,f,2


**Now that the data is prepared, time to create the music recommender!**

In [28]:
# connecting to spotipy api
client_credentials_manager = SpotifyClientCredentials(client_id=config.client_id, client_secret=config.client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager, requests_timeout=120)

In [29]:
def song_recommendation():
    ''' This function will:
    1. take the song title and artist input from the user 
    2. to search the spotipy api for its track id and audio features
    3. If the song is in the top_list df, take another song from the same df with the closest audio features
    4. If the song is NOT in the top_list, take a song from the full_list df to recommend the one with the closest audio features.'''
    
    # input search
    choice = input("Choose a song title and artist name you like :")
    
    # look for the audio features in the spotipy api
    search = sp.search(q=choice, type='track', limit=1)
    
    # get track id needed to get audio features
    try :
        track_id = search["tracks"]['items'][0]["id"]
        song_title = search['tracks']['items'][0]["name"]
        artist_name = search['tracks']['items'][0]["artists"][0]["name"]
    except (IndexError, TypeError):
        print("Sorry, we cannot locate this song. Please choose another.")
    
    # get audio features using the track id
    features = sp.audio_features(track_id)
    features_df = pd.DataFrame(features)
    audio_features = features_df[song_list_features.columns]
    
    # scale the features
    scaled = scaler.transform(audio_features)
    scaled_2D = pca.transform(scaled)
    
    # model
    cluster = model.predict(scaled_2D)
    
    # filter dataframe with same cluster
    filtered_list = song_list[song_list['cluster']==cluster[0]]
    filtered_top = filtered_list[filtered_list['group']== "t"]
    filtered_other = filtered_list[filtered_list['group']== "f"]
    
    # condition to choose song from top list or full list
    if track_id in list(top_all.track_id):
        closest, _ = pairwise_distances_argmin_min(scaled, filtered_top[song_list_features.columns])
    else:
        closest, _ = pairwise_distances_argmin_min(scaled, filtered_other[song_list_features.columns])
    
    # return recommendation
    print("Here's a song you might like!")
    return ' - '.join([filtered_list.iloc[closest[0]]['song_title'], filtered_list.iloc[closest[0]]['artist_name']])

### TRYING IT OUT

In [37]:
# Take Two - BTS (in the top list)
top_all[top_all.song_title == 'Take Two'] # recommended another song in the top list

Unnamed: 0,track_id,song_title,artist_name,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,group
160,5IAESfJjmOYu7cHyX557kz,Take Two,BTS,0.617,0.589,7,-5.978,1,0.0345,0.0268,0.0,0.378,0.566,92.991,t


In [31]:
# Take Two - BTS (in the top list)
song_recommendation()

Here's a song you might like!




'Mourning - Post Malone'

In [38]:
# Mourning - Post Malone (in the top list)
top_all[top_all.song_title == 'Mourning'] # recommended another song in the top list

Unnamed: 0,track_id,song_title,artist_name,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,group
75,0gWrMbx6pbdH3n3nsLjE55,Mourning,Post Malone,0.596,0.651,9,-4.604,1,0.0331,0.0971,0.0,0.279,0.255,73.959,t


In [39]:
# Now try a song that's not in the top list
top_all[top_all.song_title == 'Vampire'] # recommended another song in the top list

Unnamed: 0,track_id,song_title,artist_name,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,group


In [40]:
# Vampire - Olivia Rodrigo
song_recommendation()

Here's a song you might like!




'Love Broke Thru - TobyMac'

In [41]:
# Love Broke Thru - TobyMac (NOT in the top list)
top_all[top_all.song_title == 'Love Broke Thru'] # recommended a song NOT in the top list

Unnamed: 0,track_id,song_title,artist_name,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,group


In [43]:
full_all[full_all.song_title == 'Love Broke Thru']

Unnamed: 0,track_id,song_title,artist_name,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,group
83568,1NZWiuy0mlnsrcYL2dhKt6,Love Broke Thru,TobyMac,0.672,0.893,0,-3.984,1,0.0337,0.0101,0.0,0.0934,0.543,102.98,f
