<img src='images/gnod.png' width='75' align='left'/> <h1> Song recommender project </h1>
_____
                                                                                Nelson Lage
                                                                                       Gnod

In [5]:
import config
import pickle
import pandas as pd
from operator import itemgetter
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from IPython.display import IFrame, Image

%config Completer.use_jedi = False

## The data

* Hot songs - 225 unique songs obtained by web scraping the US, UK and Germany official charts
* Recommendation songs - 34358 unique songs from different genres and countries retrieved through the Spotify API

* 13 features:
    * **danceability:** describes how suitable a track is for dancing based on a combination of musical elements including tempo, rhythm stability, beat strength, and overall regularity. Ranges from 0.0 to 1.0.
    * **energy:** Energy is a measure from 0.0 to 1.0 and represents a perceptual measure of intensity and activity. Typically, energetic tracks feel fast, loud, and noisy.
    * **key:** Integers map to pitches using standard Pitch Class notation. E.g. 0 = C, 1 = C♯/D♭, 2 = D. Ranges from -1 (no key detected) to 11.
    * **loudness:** averaged decibels across the entire track. Values typically range between -60 and 0 db.
    * **mode:**  indicates the modality (major - 1 or minor - 0) of a track, the type of scale from which its melodic content is derived.

* 13 features (cont'd):
    * **speechiness:** detects the presence of spoken words in a track. The more exclusively speech-like the recording, the closer to 1.0 the attribute value.
    * **acousticness:** confidence measure from 0.0 to 1.0 of whether the track is acoustic.
    * **instrumentalness:** predicts whether a track contains no vocals (0.0 to 1.0)
    * **liveness:** detects the presence of an audience in the recording (0.0 to 1.0).
    * **valence:** measure from 0.0 to 1.0 describing the musical positiveness conveyed by a track (the closer to 1.0, the happier). 
    * **tempo:** overall estimated tempo of a track in beats per minute (BPM)
    * **duration_ms**
    * **time_signature:** a notational convention to specify how many beats are in each bar (or measure). It ranges from 3 to 7.

## Choosing k (K-Means)

### Elbow method
<img src='images/elbow.png' width='800' align='left'/>

### Silhouette method
<img src='images/silhouette.png' width='800' align='left'/>

In [9]:
with open('models/scaler.pickle', 'rb') as f: 
    scaler = pickle.load(f)

with open('models/kmeans_10.pickle', 'rb') as f: 
    model = pickle.load(f)
    
songs_scaled = pd.read_pickle('datasets/songs_clustered.pickle')
hot_songs = pd.read_pickle('datasets/hot_songs.pickle')

In [10]:
songs_scaled

Unnamed: 0,id,title,artists,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,cluster
0,33xMbeHzmWd6Od0BmLZEUs,2k,nosaj thing,-1.599557,-0.857329,0.487352,-1.256756,-1.388881,0.134295,-0.552654,-0.176625,-0.492765,-1.535615,-0.922229,-0.455240,-2.649790,9
1,3UnyplmZaq547hwsfOR5yy,4 billion souls,the doors,-0.939474,-0.335347,-0.071322,-0.829766,0.720004,-0.502147,-0.412321,0.883945,-0.399318,0.560618,1.072723,-0.244592,0.191693,8
2,1w8QCSDH4QobcQeT4uMKLm,4 minute warning,radiohead,-1.333102,-1.479359,1.046025,-1.190680,0.720004,-0.528049,1.066067,-0.164412,-0.505225,-1.133107,0.090749,-0.027268,0.191693,5
3,7J9mBHG4J2eIfDAv5BehKA,7 element,vitas,0.925714,0.621622,-0.071322,0.329069,-1.388881,-0.186393,0.201226,0.052666,0.734507,1.804011,0.301100,-0.000883,0.191693,3
4,1VZedwJj1gyi88WFRhfThb,#9 dream,r.e.m.,-0.018992,0.356281,-1.468006,0.505590,0.720004,-0.609455,-0.784039,-0.431485,-0.624214,-0.487499,-0.158919,0.131533,0.191693,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34355,6pWgRkpqVfxnj3WuIcJ7WP,cornfield chase,hans zimmer,-2.386812,-1.809948,1.046025,-1.996712,-1.388881,-0.412108,2.244209,3.425056,-0.574999,-1.830523,-0.967932,-0.574685,0.191693,2
34356,6VfNTf0N1HwfFKl7Y18diU,omen,the prodigy,-0.176443,1.352398,0.487352,0.695230,0.720004,-0.386206,-0.856356,0.017209,0.553843,0.201947,0.670465,-0.159115,0.191693,8
34357,28d1X9lfagOD4iFULH4qEK,dark star - homemade weapons remix,"quadrant, iris, homemade weapons",-1.436051,1.456794,1.046025,1.453077,0.720004,0.273671,-0.843371,2.991688,0.423017,-1.789873,1.735959,0.445944,0.191693,4
34358,5HiSc2ZCGn8L3cH3qSwzBT,러시안 룰렛 russian roulette,red velvet,0.919658,1.134905,0.487352,1.385809,-1.388881,-0.489813,-0.679932,-0.442851,-0.212424,1.796041,0.313123,-0.181431,0.191693,3


In [11]:
hot_songs

Unnamed: 0,song,artist
0,we don't talk about bruno,"carolina gaitan, mauro castillo, adassa, rhenz..."
1,do we have a problem?,nicki minaj x lil baby
2,easy on me,adele
3,heat waves,glass animals
4,stay,the kid laroi & justin bieber
...,...,...
222,love again,dua lipa
223,habit,laurell
224,head & heart,joel corry feat. mnek
225,la familia 2,sun diego & spongebozz


In [12]:
feat_names = ['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness',
              'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms', 'time_signature']

In [13]:
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=config.client_id,
                                                           client_secret=config.client_secret))

In [15]:
def spotify_player(track_id):

    return IFrame(src=f"https://open.spotify.com/embed/track/{track_id}",
       width="320",
       height="80",
       frameborder="0",
       allowtransparency="true",
       allow="encrypted-media",
      )

In [16]:
def song_suggestion():
    
    user_song = input('Enter a song: ')
    songs = hot_songs['song'].values
    
    if user_song.lower() in songs:
        suggestion = hot_songs.sample(1)
        song_suggestion = suggestion.iloc[0][0]
        by_artist = suggestion.iloc[0][1]
         
        print('Here\'s another hot song for you: ' +
                     '\033[1m' + song_suggestion.title() +  '\033[0m' +
                     ' by ' + by_artist.title())
    
    else:
        
        try:
            
            responses = sp.search(q=user_song, type="track", limit=25, market='US')
            tracks = responses['tracks']['items']
        
        except:
            print('It seemns like this song doesn\'t exist. Please, try again!')
            song_suggestion()
        
        else:
            artists_song_id = {}
                
            for track in tracks:
                
                multiple_artists = []
                for artist in track['artists']:
                    multiple_artists.append(artist['name'])
                artists = ', '.join(multiple_artists)
                
                if artists not in artists_song_id.keys():
                    artists_song_id[artists] = track['id']
                
            artists_list = list(artists_song_id.keys())
                        
            if len(artists_list) > 0:
                enumerated_artists = list(enumerate(artists_list))
                print('\nUnfortunately the song is not in the hot list!\n')
                print('Maybe we can recommend you something else based on your input.')
                print('First, we need to know the artist/s from your song:\n')
                print(*enumerated_artists, sep='\n')
                artist_number = input('\nPlease, choose a number from the list: ')
            
            input_ = False
            
            while input_ == False:
                try:
                    artist_number = int(artist_number)
                    key = artists_list[artist_number]
                    input_ = True
                    
                except:
                    artist_number = input('Please, enter a valid number: ')
                 
                else:
                    id_ = artists_song_id[key]
                    
                    all_feats = sp.audio_features(id_)[0]
                    
                    selected_feats = {key: all_feats[key] for key in feat_names}
                    
                    features_df = pd.DataFrame([selected_feats])
                    
                    feat_scaled = pd.DataFrame(scaler.transform(features_df), columns=feat_names)
                    
                    cluster_recommended = model.predict(feat_scaled)[0]
                    
                    recommended_id = songs_scaled['id'][songs_scaled['cluster'] ==  cluster_recommended].sample(1).values[0]
        
        display(spotify_player(id_))
        display(spotify_player(recommended_id))

## Our recommender

In [17]:
song_suggestion()

Enter a song: yellow

Unfortunately the song is not in the hot list!

Maybe we can recommend you something else based on your input.
First, we need to know the artist/s from your song:

(0, 'Coldplay')
(1, 'Ant Saunders')
(2, 'Cardi B')
(3, 'The Beatles')
(4, 'Pearl Jam')
(5, 'Key Glock')
(6, 'Rayland Baxter')
(7, 'Wiz Khalifa')
(8, 'Lofi Fruits Music, Orange Stick, Avocuddle')
(9, 'Yellowcard')
(10, 'Elton John')
(11, 'Harry Hudson')
(12, 'Katherine Ho')
(13, 'Yoh kamiyama')
(14, 'Emmit Fenn')
(15, 'Aminé, Nelly')
(16, 'Pity Party (Girls Club)')
(17, 'Lorde')
(18, 'City Morgue, ZillaKami')
(19, 'Spiritbox, Sam Carter')

Please, choose a number from the list: 0
