In [1]:
import numpy as np
import pandas as pd
import pickle
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from matplotlib import pyplot
from sklearn.metrics import silhouette_score
import spotipy
import json
from spotipy.oauth2 import SpotifyClientCredentials
import config
from IPython.display import IFrame

In [2]:
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id= config.client_id,
                                                           client_secret= config.client_secret))

In [3]:
def load(filename = "filename.pickle"): 
    try: 
        with open(filename, "rb") as f: 
            return pickle.load(f) 
        
    except FileNotFoundError: 
        print("File not found!") 

### User Input

In [4]:
hot_songs = pd.read_csv('hot_songs.csv')
hot_songs

Unnamed: 0,song_title,artist(s)
0,We Don't Talk About Bruno,"Carolina Gaitan, Mauro Castillo, Adassa, Rhenz..."
1,Do We Have A Problem?,Nicki Minaj X Lil Baby
2,Easy On Me,Adele
3,Heat Waves,Glass Animals
4,Stay,The Kid LAROI & Justin Bieber
...,...,...
95,Iffy,Chris Brown
96,When I'm Gone,Alesso / Katy Perry
97,Fair Trade,Drake Featuring Travis Scott
98,Megan's Piano,Megan Thee Stallion


In [5]:
# User input and checking if the song is in the hot list.

song_input = input("Please enter a name of a song: ").title()

if song_input in hot_songs.values:
    print()
    print("Your song is in the hot songs list!")
    print("Here is another song you could listen: ")
    print()
    print(hot_songs.sample())


Please enter a name of a song: crazy in love


### Getting the features of the input song

In [6]:
input_song = sp.search(q = song_input, limit = 1, market = "GB")

In [7]:
input_song

{'tracks': {'href': 'https://api.spotify.com/v1/search?query=Crazy+In+Love&type=track&market=GB&offset=0&limit=1',
  'items': [{'album': {'album_type': 'album',
     'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/6vWDO969PvNqNYHIOW5v0m'},
       'href': 'https://api.spotify.com/v1/artists/6vWDO969PvNqNYHIOW5v0m',
       'id': '6vWDO969PvNqNYHIOW5v0m',
       'name': 'Beyoncé',
       'type': 'artist',
       'uri': 'spotify:artist:6vWDO969PvNqNYHIOW5v0m'}],
     'external_urls': {'spotify': 'https://open.spotify.com/album/6oxVabMIqCMJRYN1GqR3Vf'},
     'href': 'https://api.spotify.com/v1/albums/6oxVabMIqCMJRYN1GqR3Vf',
     'id': '6oxVabMIqCMJRYN1GqR3Vf',
     'images': [{'height': 640,
       'url': 'https://i.scdn.co/image/ab67616d0000b27345680a4a57c97894490a01c1',
       'width': 640},
      {'height': 300,
       'url': 'https://i.scdn.co/image/ab67616d00001e0245680a4a57c97894490a01c1',
       'width': 300},
      {'height': 64,
       'url': 'https://i.

In [8]:
song_id = input_song["tracks"]["items"][0]["id"]

In [9]:
input_song_features = sp.audio_features(input_song["tracks"]["items"][0]["id"] )

In [10]:
input_song_features = pd.DataFrame(input_song_features)
input_song_features_num = input_song_features._get_numeric_data()


In [11]:
input_song_features_num = input_song_features_num.drop(columns = ["key", "mode", "time_signature"])

In [12]:
input_song_features_num

Unnamed: 0,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms
0,0.646,0.77,-6.596,0.226,0.00249,0,0.0715,0.681,99.165,236133


In [13]:
model = load("scaler.pickle")

In [14]:
input_song_scaled = model.transform(input_song_features_num)

In [15]:
input_song_scaled = pd.DataFrame(input_song_scaled, columns = ["danceability", "energy", "loudness", "speechiness", "acousticness", "instrumentalness", "liveness",
                          "valence", "tempo", "duration_ms"])

In [16]:
model_kmeans = load("kmeans_9.pickle")

In [17]:
input_song_cluster = model_kmeans.predict(input_song_scaled)

In [18]:
input_song_cluster

array([2], dtype=int32)

In [19]:
model_kmeans

KMeans(n_clusters=9, random_state=1234)

### Recommending a new song

In [20]:
spotify_songs = pd.read_csv('spotify_songs.csv')

In [21]:
spotify_songs.drop(columns = ["Unnamed: 0"], inplace = True)
spotify_songs

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,...,energy.1,loudness.1,speechiness.1,acousticness.1,instrumentalness.1,liveness.1,valence.1,tempo.1,duration_ms.1,cluster
0,0.522,0.817,0,-6.170,1,0.0512,0.32900,0.000002,0.0807,0.562,...,1.192791,0.556894,-0.465635,-0.063750,-0.492709,-0.650630,0.226191,1.094516,-0.049642,7
1,0.533,0.776,8,-7.092,1,0.0847,0.15100,0.000087,0.1300,0.697,...,1.013490,0.397130,-0.135124,-0.618475,-0.492437,-0.277705,0.804202,-1.025819,0.065054,2
2,0.692,0.651,9,-8.267,1,0.0324,0.29200,0.002410,0.1050,0.706,...,0.466841,0.193527,-0.651116,-0.179058,-0.484986,-0.466815,0.842736,-0.752740,0.552797,2
3,0.873,0.651,1,-4.179,1,0.0374,0.02510,0.000107,0.2130,0.828,...,0.466841,0.901893,-0.601786,-1.010833,-0.492371,0.350142,1.365088,-0.159519,-0.368614,2
4,0.682,0.605,8,-10.771,1,0.0403,0.18500,0.003260,0.0469,0.914,...,0.265674,-0.240364,-0.573175,-0.512516,-0.482260,-0.906307,1.733302,0.221096,-0.452335,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5083,0.964,0.540,5,-8.462,0,0.0763,0.05190,0.000000,0.1010,0.771,...,-0.018583,0.159738,-0.217999,-0.927313,-0.492715,-0.497073,1.121038,0.084209,-0.406379,2
5084,0.671,0.636,6,-8.117,0,0.1030,0.51500,0.000411,0.1720,0.262,...,0.401243,0.219519,0.045423,0.515906,-0.491397,0.040001,-1.058279,0.047634,-0.732165,5
5085,0.573,0.856,9,-4.807,1,0.3710,0.02040,0.000049,0.1790,0.515,...,1.363346,0.793073,2.689511,-1.025480,-0.492556,0.092952,0.024957,0.685037,0.404927,6
5086,0.514,0.661,0,-11.447,1,0.0415,0.00441,0.001120,0.1160,0.627,...,0.510573,-0.357501,-0.561336,-1.075312,-0.489123,-0.383606,0.504493,0.529325,0.707916,7


In [22]:
# compare the cluster nr of the input song to my song df and return a new song from the same cluster nr.

match_song = spotify_songs[spotify_songs["cluster"] == int(input_song_cluster)]

In [23]:
song_suggestion = match_song.sample()

In [24]:
suggestion = song_suggestion["id"].iloc[0]

In [25]:
suggestion

'1mrsNUOIsdCgT5SCGXCOaP'

In [26]:
def song_recommender():
    
    song_input = input("Please enter a name of a song: ").title()

    if song_input in hot_songs.values:
        print()
        print("Your song is in the hot songs list!")
        print("Here is another song you could listen: ")
        print()
        print(hot_songs.sample())
    
    else:
        input_song = sp.search(q = song_input, limit = 1, market = "GB")
        
        song_id = input_song["tracks"]["items"][0]["id"]
        
        input_song_features = sp.audio_features(input_song["tracks"]["items"][0]["id"] )
    
        input_song_features = pd.DataFrame(input_song_features)
        input_song_features_num = input_song_features._get_numeric_data()

        input_song_features_num = input_song_features_num.drop(columns = ["key", "mode", "time_signature"])

        model = load("scaler.pickle")
        model_kmeans = load("kmeans_9.pickle")

        input_song_scaled = model.transform(input_song_features_num)

        input_song_scaled = pd.DataFrame(input_song_scaled, columns = ["danceability", "energy", "loudness", "speechiness", "acousticness", "instrumentalness", "liveness",
                          "valence", "tempo", "duration_ms"])

        input_song_cluster = model_kmeans.predict(input_song_scaled)

        match_song = spotify_songs[spotify_songs["cluster"] == int(input_song_cluster)]

        song_suggestion = match_song.sample()

        suggestion = song_suggestion["id"].iloc[0]

        return IFrame(src="https://open.spotify.com/embed/track/" + suggestion,
                    width="320",
                    height="80",
                    frameborder="0",
                    allowtransparency="true",
                    allow="encrypted-media",
                    )

In [27]:
song_recommender()

Please enter a name of a song: crazy in love


In [28]:
IFrame(src="https://open.spotify.com/embed/track/" + song_id,
                   width="320",
                   height="80",
                   frameborder="0",
                   allowtransparency="true",
                   allow="encrypted-media",
                  )