## Spotify song recommender

In [8]:
import numpy as np
import pandas as pd
import pickle
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from matplotlib import pyplot
from sklearn.metrics import silhouette_score
import spotipy
import json
from spotipy.oauth2 import SpotifyClientCredentials
import config
from IPython.display import IFrame

In [9]:
# Connecting to spotify.

sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id= config.client_id,
                                                           client_secret= config.client_secret))

In [10]:
# Function to load my previously saved model.

def load(filename = "filename.pickle"): 
    try: 
        with open(filename, "rb") as f: 
            return pickle.load(f) 
        
    except FileNotFoundError: 
        print("File not found!") 

In [11]:
# Function for song recommendations.

def song_recommender():
    
    song_input = input("Please enter a name of a song: ").title()

    if song_input in hot_songs.values:
        print()
        print("Your song is in the hot songs list!")
        print("Here is another hot song you could listen: ")
        print()
        print(hot_songs.sample())
    
    else:
        input_song = sp.search(q = song_input, limit = 1, market = "GB")
        
        song_id = input_song["tracks"]["items"][0]["id"]
        
        input_song_features = sp.audio_features(input_song["tracks"]["items"][0]["id"] )
    
        input_song_features = pd.DataFrame(input_song_features)
        input_song_features_num = input_song_features._get_numeric_data()

        input_song_features_num = input_song_features_num.drop(columns = ["key", "mode", "time_signature"])

        model = load("scaler.pickle")
        model_kmeans = load("kmeans_9.pickle")

        input_song_scaled = model.transform(input_song_features_num)

        input_song_scaled = pd.DataFrame(input_song_scaled, columns = ["danceability", "energy", "loudness", "speechiness", "acousticness", "instrumentalness", "liveness",
                          "valence", "tempo", "duration_ms"])

        input_song_cluster = model_kmeans.predict(input_song_scaled)

        match_song = spotify_songs[spotify_songs["cluster"] == int(input_song_cluster)]

        song_suggestion = match_song.sample()

        suggestion = song_suggestion["id"].iloc[0]
        
        print()
        print(IFrame(src="https://open.spotify.com/embed/track/" + song_id,
                   width="320",
                   height="80",
                   frameborder="0",
                   allowtransparency="true",
                   allow="encrypted-media",
                  ))
        print()
        print("Here is another song you can listen! Check that out!")

        return IFrame(src="https://open.spotify.com/embed/track/" + suggestion,
                    width="320",
                    height="80",
                    frameborder="0",
                    allowtransparency="true",
                    allow="encrypted-media",
                    )

In [12]:
# Data needed for the functions.

hot_songs = pd.read_csv('hot_songs.csv')
spotify_songs = pd.read_csv('spotify_songs.csv')
spotify_songs.drop(columns = ["Unnamed: 0"], inplace = True)

In [26]:
# Calling the function.

song_recommender()

Please enter a name of a song: we will rock you

<IPython.lib.display.IFrame object at 0x16c1a33a0>

Here is another song you can listen! Check that out!


### Here is all the code I used for building the function.

In [70]:
# User input and checking if the song is in the hot list.

song_input = input("Please enter a name of a song: ").title()

if song_input in hot_songs.values:
    print()
    print("Your song is in the hot songs list!")
    print("Here is another song you could listen: ")
    print()
    print(hot_songs.sample())


Please enter a name of a song: back in black


#### Getting the features of the input song

In [71]:
input_song = sp.search(q = song_input, limit = 1, market = "GB")

In [72]:
input_song

{'tracks': {'href': 'https://api.spotify.com/v1/search?query=Back+In+Black&type=track&market=GB&offset=0&limit=1',
  'items': [{'album': {'album_type': 'album',
     'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/711MCceyCBcFnzjGY4Q7Un'},
       'href': 'https://api.spotify.com/v1/artists/711MCceyCBcFnzjGY4Q7Un',
       'id': '711MCceyCBcFnzjGY4Q7Un',
       'name': 'AC/DC',
       'type': 'artist',
       'uri': 'spotify:artist:711MCceyCBcFnzjGY4Q7Un'}],
     'external_urls': {'spotify': 'https://open.spotify.com/album/6mUdeDZCsExyJLMdAfDuwh'},
     'href': 'https://api.spotify.com/v1/albums/6mUdeDZCsExyJLMdAfDuwh',
     'id': '6mUdeDZCsExyJLMdAfDuwh',
     'images': [{'height': 640,
       'url': 'https://i.scdn.co/image/ab67616d0000b2730b51f8d91f3a21e8426361ae',
       'width': 640},
      {'height': 300,
       'url': 'https://i.scdn.co/image/ab67616d00001e020b51f8d91f3a21e8426361ae',
       'width': 300},
      {'height': 64,
       'url': 'https://i.sc

In [73]:
# Figuring out how to get song id-s.

song_id = input_song["tracks"]["items"][0]["id"]

In [74]:
# Getting audio features of input song.

input_song_features = sp.audio_features(input_song["tracks"]["items"][0]["id"] )

In [75]:
# Getting only numerical data and putting it into a dataframe.

input_song_features = pd.DataFrame(input_song_features)
input_song_features_num = input_song_features._get_numeric_data()


In [76]:
# Dropping unnecessary columns.

input_song_features_num = input_song_features_num.drop(columns = ["key", "mode", "time_signature"])

In [77]:
# Checking the result.

input_song_features_num

Unnamed: 0,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms
0,0.31,0.7,-5.678,0.047,0.011,0.00965,0.0828,0.763,188.386,255493


In [78]:
# Loading my model

model = load("scaler.pickle")

In [81]:
model_kmeans = load("kmeans_9.pickle")

In [79]:
# Scaling the audio features of input songs using the same model tranform as I used for my spotify songs.

input_song_scaled = model.transform(input_song_features_num)

In [80]:
# Making a dataframe.

input_song_scaled = pd.DataFrame(input_song_scaled, columns = ["danceability", "energy", "loudness", "speechiness", "acousticness", "instrumentalness", "liveness",
                          "valence", "tempo", "duration_ms"])

In [82]:
# Predicting to which cluster the input song belongs to.

input_song_cluster = model_kmeans.predict(input_song_scaled)

In [83]:
input_song_cluster

array([7], dtype=int32)

#### Recommending a new song

In [22]:
# Compare the cluster nr of the input song to my song df and return a new song from the same cluster nr.

match_song = spotify_songs[spotify_songs["cluster"] == int(input_song_cluster)]

In [23]:
song_suggestion = match_song.sample()

In [24]:
# Getting the id of the input song.

suggestion = song_suggestion["id"].iloc[0]

In [25]:
suggestion

'5eaVYUT0pGYVGoKIaCcrfx'