# Song Recommender

In [84]:
#Import required libraries
import spotipy
import pandas as pd
import json
from spotipy.oauth2 import SpotifyClientCredentials
from config import *
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from sklearn.preprocessing import StandardScaler
import pickle
import random

In [85]:
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id= Client_ID,
                                                           client_secret= Client_Secret))

In [86]:
#Read csv into notebook
song_clusters_df = pd.read_csv("../Data/songs_with_clusters.csv")

In [87]:
#Check df
song_clusters_df.head()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,...,id,uri,track_href,analysis_url,duration_ms,time_signature,title,artist,hot_100,cluster
0,0.707,0.681,0,-4.325,1,0.0668,0.0632,5e-06,0.0322,0.646,...,0yLdNVWF3Srea0uzk55zFn,spotify:track:0yLdNVWF3Srea0uzk55zFn,https://api.spotify.com/v1/tracks/0yLdNVWF3Sre...,https://api.spotify.com/v1/audio-analysis/0yLd...,200455,4,flowers,miley cyrus,1,2
1,0.347,0.378,8,-9.96,1,0.0354,0.857,0.0,0.118,0.61,...,5ReIs3aH4wfBjp93QOtWAl,spotify:track:5ReIs3aH4wfBjp93QOtWAl,https://api.spotify.com/v1/tracks/5ReIs3aH4wfB...,https://api.spotify.com/v1/audio-analysis/5ReI...,168609,4,kill bill,sza,1,1
2,0.517,0.675,6,-5.382,1,0.0357,0.459,0.0,0.151,0.518,...,59uQI0PADDKeE6UZDTJEe8,spotify:track:59uQI0PADDKeE6UZDTJEe8,https://api.spotify.com/v1/tracks/59uQI0PADDKe...,https://api.spotify.com/v1/audio-analysis/59uQ...,163855,4,last night,morgan wallen,1,0
3,0.714,0.472,2,-7.375,1,0.0864,0.013,5e-06,0.266,0.238,...,3nqQXoyQOWXiESFLlDF1hG,spotify:track:3nqQXoyQOWXiESFLlDF1hG,https://api.spotify.com/v1/tracks/3nqQXoyQOWXi...,https://api.spotify.com/v1/audio-analysis/3nqQ...,156943,4,unholy,sam smith & kim petras,1,7
4,0.728,0.293,1,-13.657,0,0.0562,0.479,0.904,0.118,0.152,...,4CpujwKsq4DxOIMiKr12m8,spotify:track:4CpujwKsq4DxOIMiKr12m8,https://api.spotify.com/v1/tracks/4CpujwKsq4Dx...,https://api.spotify.com/v1/audio-analysis/4Cpu...,217026,4,creepin',"metro boomin, the weeknd & 21 savage",1,3


In [88]:
song_clusters_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2489 entries, 0 to 2488
Data columns (total 22 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   danceability      2489 non-null   float64
 1   energy            2489 non-null   float64
 2   key               2489 non-null   int64  
 3   loudness          2489 non-null   float64
 4   mode              2489 non-null   int64  
 5   speechiness       2489 non-null   float64
 6   acousticness      2489 non-null   float64
 7   instrumentalness  2489 non-null   float64
 8   liveness          2489 non-null   float64
 9   valence           2489 non-null   float64
 10  tempo             2489 non-null   float64
 11  type              2489 non-null   object 
 12  id                2489 non-null   object 
 13  uri               2489 non-null   object 
 14  track_href        2489 non-null   object 
 15  analysis_url      2489 non-null   object 
 16  duration_ms       2489 non-null   int64  


In [89]:
#Drop duplicates from previously merged dataframe
song_clusters_df = song_clusters_df.drop_duplicates()

In [90]:
#Function to return song id
def search_song(title, artist, limit=1):
    try:
        result = sp.search(q=title + artist, limit=limit)
        return result["tracks"]["items"][0]["id"]
    except:
        return None

In [91]:
#Function that receives input of song title and artist from user
def read_song_and_artist():
    title_s = input("Please Enter the song title: ").lower()
    artist_s = input("Please Enter the artist title: ").lower()
    
    return artist_s,title_s

In [92]:
input_s = search_song(title_s,artist_s)

In [93]:
#sp.audio_features gets the audio features of the input from user in the form of a dictionary 
my_dict = sp.audio_features(input_s)
my_dict

[{'danceability': 0.637,
  'energy': 0.678,
  'key': 0,
  'loudness': -3.798,
  'mode': 1,
  'speechiness': 0.266,
  'acousticness': 0.209,
  'instrumentalness': 0,
  'liveness': 0.156,
  'valence': 0.254,
  'tempo': 84.039,
  'type': 'audio_features',
  'id': '561jH07mF1jHuk7KlaeF0s',
  'uri': 'spotify:track:561jH07mF1jHuk7KlaeF0s',
  'track_href': 'https://api.spotify.com/v1/tracks/561jH07mF1jHuk7KlaeF0s',
  'analysis_url': 'https://api.spotify.com/v1/audio-analysis/561jH07mF1jHuk7KlaeF0s',
  'duration_ms': 250760,
  'time_signature': 4}]

In [94]:
#Change data type to a dataframe
input_df = pd.DataFrame(my_dict)

In [95]:
input_s = input_df.drop(['type','id','uri','track_href','analysis_url',
                                    'time_signature','key','mode', 'duration_ms'],axis=1)
input_s

Unnamed: 0,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,0.637,0.678,-3.798,0.266,0.209,0,0.156,0.254,84.039


In [96]:
#Function to load files
def load(filename = "../scalers/scaler.pickle"): 
    try: 
        with open(filename, "rb") as file: 
            return pickle.load(file) 
    except FileNotFoundError: 
        print("File ", filename, " not found!")
        return None

In [97]:
#Loading the scaler
scaler = load("../scalers/scaler.pickle")
scaler

StandardScaler()

In [98]:
#Loading best model
best_model = load("../models/Kmeans13.pickle")
best_model

KMeans(n_clusters=13, random_state=1234, verbose=1)

In [99]:
#Transform the new song 
song_scaled = scaler.transform(input_s)
song_scaled_df = pd.DataFrame(input_s, columns=input_s.columns)

In [100]:
#Predict the cluster of the transformed song using the best model
song_c = best_model.predict(song_scaled_df)
song_c

array([4], dtype=int32)

In [101]:
#Use the predicted cluster to recommend 5 songs 
cluster_n = song_clusters_df[song_clusters_df['cluster'] == song_c[0]] 
recommend = (cluster_n.iloc[:,18:20]).sample(5)
recommend

Unnamed: 0,title,artist
1683,new world slaughter,iconoclast
1684,fvneral moon,lorna shore
1785,dead presidents,rick ross
2004,nite time - ruby & tony radio edit,george acosta
2093,go down in history,four year strong


## Combine all previous steps into a while loop

In [103]:
done = False #Set this so the loop can be ended at a given point

#Load the scaler and model to be used
scaler = load("../scalers/scaler.pickle")
model  = load("../models/Kmeans13.pickle")
songs  = song_clusters_df

while not done:
    
    #Let the user input a song and artist using the function
    title,artist = read_song_and_artist()
    
    #Get the id of the song input
    id = search_song(title, artist)
    if not id: 
        print("! Unable to get song id")
        continue #Restarts the while loop if there is no song id
        
    #Get the audio features of the song input
    af = sp.audio_features(id)
    if not af:
        print("! Unable to get audio features")
        continue #Restarts the while loop if there are no song features
    
    #Scale the new audio features of the new song
    af_scaled_df = pd.DataFrame(af)
    af_scaled_df.drop(['type','id','uri','track_href','analysis_url',
                                    'time_signature','key','mode', 'duration_ms'], axis=1, inplace=True) #Only need feature columns
    af_scaled = scaler.transform(af_scaled_df) 
    af_scaled_df = pd.DataFrame(af_scaled, columns=af_scaled_df.columns) #Return back to a df
    
    #Use the model to predict the cluster of the new song
    cluster = model.predict(af_scaled_df)[0]
    
    #Check if the song is in the songs file and if it is a hot_100 song then recommend 5 hot_100 songs in the same cluster. 
    if not songs[(songs["id"] == id) & (songs["hot_100"] == 1)].empty:
        recommended = songs[(songs["hot_100"] == 1) & (songs["cluster"] == cluster)].sample(5)
    #If not recommend 5 songs from the not_100 list and the same cluster.
    else:
        recommended = songs[(songs["hot_100"] == 0) & (songs["cluster"] == cluster)].sample(5)
     
    #Return the recommended artists and song titles
    
  
    artist = recommended["artist"].values[0:5]
    title  = recommended["title"].values[0:5]
    print("Try these songs:")
    for i in range(5):
        print("  {} - {}".format(artist[i], title[i]))

   # print("Recommmended: {} - {}".format(artist, title))
    
    #Ask the user if they want more recommendations or not. 
    quit = input("Do you want more recommendations? (y|n) ")
    if quit == "n":
        done = True
        print("Bye!")

Please Enter the song title: mockingbird
Please Enter the artist title: eminem
! Unable to get song id
Please Enter the song title: flowers
Please Enter the artist title: miley cyrus
Try these songs:
  harry styles - as it was
  cole swindell - she had me at heads carolina
  corey kent - wild as her
  carly pearce - what he didn't do
  falling in reverse - watch the world burn
Do you want more recommendations? (y|n) n
Bye!
