In [1]:
import pandas as pd
import pickle 
import random

## Importing CSV files

In [2]:
clusters = pd.read_csv('cluster_df.csv')
clusters = clusters.drop('Unnamed: 0', axis= 1)
clusters

Unnamed: 0,title,artist,no_cluster
0,You're The One - Un mal pour un bien,Petula Clark,6
1,Doctor My Eyes,Jackson Browne,1
2,Falling in Love at a Coffee Shop,Landon Pigg,4
3,Peaceful Easy Feeling - 2013 Remaster,Eagles,1
4,Summertime Blues,Eddie Cochran,5
...,...,...,...
5465,"Tighter, Tighter",Alive 'N Kickin',5
5466,Lunatic Fringe,Red Rider,7
5467,Run Runaway,Slade,5
5468,Hey Little Girl,Dee Clark,1


In [3]:
songs = pd.read_csv('100songs.csv')
songs = songs.drop('Unnamed: 0', axis= 1)
songs

Unnamed: 0,title,artist
0,Fast Car,Luke Combs
1,Last Night,Morgan Wallen
2,Need A Favor,Jelly Roll
3,Take Two,BTS
4,Save Me (with Lainey Wilson),Jelly Roll
...,...,...
95,Perfect,Ed Sheeran
96,Always Remember Us This Way,Lady Gaga
97,Whiskey Glasses,Morgan Wallen
98,She Had Me At Heads Carolina,Cole Swindell


## Read pickle: kmeans and scaler

In [5]:
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
scaler = pickle.load(open('scaler.pkl', 'rb'))
kmeans = pickle.load(open('kmeans.pkl', 'rb'))

In [6]:
kmeans

In [7]:
scaler

## Connection to Spotify

In [8]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

secrets_file = open("secrets.txt", "r")

string = secrets_file.read()

secrets_dict={}
for line in string.split('\n'):
    if len(line) > 0:
        secrets_dict[line.split(':')[0]]=line.split(':')[1].strip()

## Authentication 

In [11]:
input_song = input("Enter a song name: ")
included_song = clusters['title'].isin([input_song]).any()
client_id=secrets_dict['clientid']
client_secret=secrets_dict['clientsecret']
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=secrets_dict['clientid'],
                                                           client_secret=secrets_dict['clientsecret'])) 

input_song = sp.search(q=input_song, type='track', limit=1)
uri = input_song['tracks']['items'][0]['uri']
audio_features = sp.audio_features(uri)

Enter a song name: Run Runaway


In [15]:
audio_features_df = pd.DataFrame(audio_features, index=[0])
audio_features_df

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,0.576,0.86,2,-7.22,1,0.0592,8.2e-05,0.000269,0.0517,0.757,128.543,audio_features,5e0ozDjIRDuitUuekiF6ns,spotify:track:5e0ozDjIRDuitUuekiF6ns,https://api.spotify.com/v1/tracks/5e0ozDjIRDui...,https://api.spotify.com/v1/audio-analysis/5e0o...,300800,4


In [33]:
selected_columns = ['danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms', 'time_signature']
audio_song = audio_features_df[selected_columns]
audio_song

Unnamed: 0,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,0.576,0.86,-7.22,0.0592,8.2e-05,0.000269,0.0517,0.757,128.543,300800,4


In [34]:
audio_song = scaler.transform(audio_song)
audio_song

array([[ 0.1804002 ,  1.2122233 ,  0.75732329,  0.10902251, -1.10971126,
        -0.29669769, -0.82997193,  0.61645083,  0.25161521,  0.90325808,
         0.20158559]])

In [35]:
cluster_number = kmeans.predict(audio_song)

In [36]:
list(cluster_number)[0]

0

In [37]:
recommend_cluster = clusters[clusters['no_cluster']== list(cluster_number)[0]]

In [38]:
recommend_cluster['title']


10                               Come Go With Me
11                     All I Have to Do Is Dream
15                                 Runaround Sue
16                            A Teenager In Love
18                 Whole Lot of Shakin' Going On
                          ...                   
5410                                Perhaps Love
5411    Wild Montana Skies (with Emmylou Harris)
5412                                  Love Again
5423            Good Morning Girl - GH 2 Version
5447                  We'll Sing in the Sunshine
Name: title, Length: 1018, dtype: object

In [39]:
recommend_cluster.sample()

Unnamed: 0,title,artist,no_cluster
1921,Spam Song,Monty Python,0


In [55]:
def recommend_song():
    input_song = input("Enter a song name: ")
    #is_song_in_list = playlist_final['name'].isin([song]).any()
    client_id=secrets_dict['clientid']
    client_secret=secrets_dict['clientsecret']
    sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=secrets_dict['clientid'],
                                                           client_secret=secrets_dict['clientsecret'])) 
    if input_song in list(songs['title']):
        recommended_song = songs.sample(n=1)['title'].values[0]
        print("Your song is in the Top 100 list!")
        print("Here's another recommended song:", recommended_song)
    
    else:
        input_song = sp.search(q=input_song, type='track', limit=1)
        uri = input_song['tracks']['items'][0]['uri']
        audio_features = sp.audio_features(uri)
        audio_features_df = pd.DataFrame(audio_features, index=[0])
        selected_columns = ['danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms', 'time_signature']
        audio_song = audio_features_df[selected_columns]
        audio_song = scaler.transform(audio_song) 
        cluster_number = kmeans.predict(audio_song)
        recommend_cluster = clusters[clusters['no_cluster']== list(cluster_number)[0]]
        rec = recommend_cluster.sample()
        
        print("Here's another recommended song:", rec[['title','artist']])

In [57]:
recommend_song()

Enter a song name: Wannabe
Here's another recommended song:                         title            artist
789  Ruby Jean and Billie Lee  Seals and Crofts
