In [2]:
import pandas as pd
import numpy as np
import requests
from time import sleep
import random
from random import randint
from sklearn.preprocessing import StandardScaler
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import joblib
from joblib import dump, load

In [3]:
secrets_file = open("SpotifySecret.txt","r")

In [4]:
string = secrets_file.read()

In [5]:
secrets_dict={}
for line in string.split('\n'):
    if len(line) > 0:
        secrets_dict[line.split(':')[0]]=line.split(':')[1]

In [6]:
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=secrets_dict['cid'],
                                                           client_secret=secrets_dict['cs']))

In [7]:
# loading list of songs with audio features with most of the greatest hits from the 80's
repository = pd.read_csv('list_with_clusters.csv')

In [8]:
repository.head()

Unnamed: 0,artist,song,uri,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,cluster
0,pink floyd,another brick in the wall (part 2),spotify:track:1xUTI8Ce8IQnyYCGPmrbzS,0.395,0.766,0,-11.041,1,0.0512,0.00583,0.00624,0.774,0.611,105.41,326373,4,3
1,barbra streisand,a woman in love,spotify:track:53No4na49hTw82MxrFh2PB,0.489,0.245,3,-16.876,0,0.0284,0.43,1.5e-05,0.298,0.333,169.747,230227,4,2
2,john lennon,(just like) starting over,spotify:track:5y0YreEOnQiKFAnCrcFIXz,0.701,0.79,9,-6.16,1,0.0725,0.301,6.1e-05,0.179,0.421,99.104,236547,4,1
3,diana ross,upside down,spotify:track:3pbtBomO4Zt5gGiqsYeiBH,0.873,0.855,5,-8.559,0,0.0615,0.179,0.0285,0.0377,0.884,107.868,245600,4,5
4,lipps inc,funkytown,spotify:track:5dIKF6uVJ206hj0pO0gg0I,0.893,0.509,0,-10.136,1,0.0659,0.00154,0.515,0.0783,0.334,122.478,469347,4,0


In [9]:
repository.shape

(1756, 17)

In [10]:
# number of elements in cluster
repository['cluster'].value_counts()

1    652
5    380
3    326
2    290
0     79
4     29
Name: cluster, dtype: int64

In [13]:
# Most of the songs of an artist (or band) should belong to one or two clusters (similar style).
# Seems to work fine for most of the artist, but not for all.
repository[repository['artist']=='lionel richie']

Unnamed: 0,artist,song,uri,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,cluster
233,lionel richie,truly,spotify:track:7AslBsiVLQAGrmFmi79ZeK,0.366,0.238,1,-13.853,1,0.038,0.646,1e-05,0.0876,0.131,68.377,200507,4,2
307,lionel richie,all night long (all night),spotify:track:7gaQtc0vrSpmRJpSMDiPEk,0.716,0.556,1,-13.917,1,0.0406,0.111,0.0076,0.0422,0.8,108.828,383440,4,1
363,lionel richie,running with the night,spotify:track:7EAWB05mZwqsoj08jPRCCu,0.766,0.6,9,-9.865,0,0.0374,0.0709,2.1e-05,0.39,0.734,120.19,249667,4,5
387,lionel richie,you are,spotify:track:2SQkCUj5rYrFCRKnaTbYYr,0.777,0.456,1,-12.736,1,0.033,0.196,3.2e-05,0.0473,0.727,111.981,300880,4,1
407,lionel richie,hello,spotify:track:0mHyWYXmmCB9iQyK18m3FQ,0.487,0.246,9,-14.104,0,0.0376,0.414,0.0,0.0983,0.0562,61.084,251107,4,2
462,lionel richie,stuck on you,spotify:track:0ROwoz82DIW4tOzkxYnSjf,0.522,0.346,5,-15.462,1,0.0297,0.258,0.0,0.114,0.27,131.692,195133,4,2
498,lionel richie,"say you, say me",spotify:track:17CPezzLWzvGfpZW6X8XT0,0.636,0.405,8,-11.113,1,0.0287,0.355,0.0,0.0804,0.257,128.068,241067,4,2
620,lionel richie,dancing on the ceiling,spotify:track:0snPJPxkk0MbTc0xeUvAPt,0.698,0.774,0,-8.642,1,0.048,0.12,2e-06,0.394,0.731,133.197,270720,4,1
1743,lionel richie,"endless love - from ""the endless love"" soundtrack",spotify:track:0rt0dZ11FVsvprbarag5tk,0.461,0.275,10,-12.331,1,0.0259,0.745,0.000219,0.126,0.124,93.126,268293,4,2


In [14]:
# to import scaler
scaler=joblib.load('scaler.bin')

In [15]:
# to import kmeans
kmeans=joblib.load('kmeans.bin')

In [16]:
# function for user entry
def user_input ():
    user_artist=''
    user_song=''
    print("80's SONG RECOMMENDER")
    while len(user_artist)==0:
        user_artist=input('Enter the name of the artist: ').lower()
    while len(user_song)==0:
        user_song=input("Enter the song's title: ").lower()
    return [user_artist, user_song]

In [17]:
# function to check if user entry in repository of songs
def check_repo (repo, song):
    df1=repo[((repo['artist']==song[0]) & (repo['song']==song[1]))]
    if len(df1)>0:
        in_repo=True
        cluster=df1['cluster']
    else:
        in_repo=False
        cluster='NA'
    return in_repo, cluster

In [18]:
# function to search for features of new song
def pick_audio_feat (song):
    results = sp.search(q=(song[0]+ ' '+song[1]), type='track', limit=1)
    if len(results['tracks']['items'])>0:
        uri=results['tracks']['items'][0]['uri']
        results=sp.audio_features(uri)
    else:
        uri=[]
        results=[]
    return uri, results

In [19]:
# function recommender of songs of 80's. It's possible to try songs for other decades, and find a 
# recommended song of the 80's
def recommender (repo, song):
    repo_info=check_repo(repo, song)
    if repo_info[0]:
        print("This song is a hit of 80's!")
        cluster_songs=repo[(repo['cluster']==repo_info[0])].reset_index()
        num=random.randint(0, len(cluster_songs)-1)
        print('RECOMMENDED SONG')
        print('Artist:', cluster_songs['artist'][num])
        print('Song title:', cluster_songs['song'][num])
    else:
        print("This song is not a hit of 80's!")
        audio_feat=list(pick_audio_feat(song)[1])
        if (len(audio_feat)>0): 
            audio_feat_col=list(audio_feat[0].keys())
            audio_feat_values=list(audio_feat[0].values())
            df_audio_feat=pd.DataFrame(audio_feat, columns=audio_feat_col)
            df_audio_feat = df_audio_feat.drop(['type','id','uri','track_href','analysis_url'],axis=1)
            audio_feat_scaled=scaler.transform(df_audio_feat)
            cluster=kmeans.predict(audio_feat_scaled)
            cluster_songs=repo[(repo['cluster']==cluster[0])].reset_index()
            num=random.randint(0, len(cluster_songs)-1)
            print('RECOMMENDED SONG')
            print('Artist:', cluster_songs['artist'][num])
            print('Song title:', cluster_songs['song'][num])
        else:
            print('Artist and song not found. Please, try again')

In [25]:
user_song=user_input()

80's SONG RECOMMENDER
Enter the name of the artist: queen
Enter the song's title: another one bites the dust


In [26]:
recommender(repository, user_song)

This song is a hit of 80's!
RECOMMENDED SONG
Artist: david bowie
Song title: fashion
