In [68]:
import config
import spotipy
import numpy as np
import pandas as pd
import pickle
from sklearn.preprocessing import StandardScaler
from pyjarowinkler import distance
from IPython.display import IFrame
from spotipy.oauth2 import SpotifyClientCredentials

#Initialize SpotiPy with user credentias
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id= config.c_id,
                                                           client_secret= config.c_se))

In [69]:
# loading top 100 chart (top_chart_df) and song database (upsampled_df)
top_chart_df = pd.read_csv('top_chart_df.csv')
upsampled_df = pd.read_csv('upsampled_df.csv')

In [70]:
# function for loading files with pickle
def load(filename = "filename.pickle"): 
    try: 
        with open(filename, "rb") as f: 
            return pickle.load(f) 
        
    except FileNotFoundError: 
        print("File not found!") 

In [71]:
# loading scaler
scaler = load("Model/scaler.pickle")

In [72]:
# loading kmeans
kmeans = load("Model/kmeans.pickle")

In [73]:
# function for recommending similar songs
def song_recommender():
    user_input = input("Please input a song: ")   ### asking user for an input
    print('\n')
    song_list_cleaned = [each_string.replace(" ","").lower() for each_string in list(top_chart_df.title)]   ### getting generic song titles by converting to lowercase and removing space
    artist_list_cleaned = [each_string.replace(" ","").lower() for each_string in list(top_chart_df.artist)]   ### getting generic artist names by converting to lowercase and removing space
    user_input_cleaned = user_input.replace(" ", "").lower()   ### getting generic user input of song by converting to lowercase and removing space
    
    duplicate_check = top_chart_df[top_chart_df.title.duplicated(keep=False)]   ### checking for tracks in top chart having the same title
    duplicate_list = [each_string.replace(" ","").lower() for each_string in list(duplicate_check.title)]   ### getting generic duplicate song titles by converting to lowercase and removing space
    
    similarity_count = 0
    similarity_song_list = []
    similarity_score_list = []
    for i in range(0, len(top_chart_df.title)):   ### computing the text similarity between 0 and 1 based on the cleaned user input with each cleaned song title from the top chart
        similarity_score = distance.get_jaro_distance(user_input_cleaned, song_list_cleaned[i], winkler=True, scaling=0.1)
        similarity_song_list.append(top_chart_df.title[i])
        similarity_score_list.append(similarity_score)
        if similarity_score > 0.9:   ### a song is matched with the user input of song if their similarity score is larger than 0.9
            similarity_count = similarity_count+1
        else:
            similarity_count = similarity_count   ### if no song from the top chart has a similarity score more than 0.9, then user input of song is not in top chart and similarity count will be zero
            
    similarity_df = pd.DataFrame({"title":similarity_song_list, "similarity":similarity_score_list})   ### saving the similarity score results in a dataframe
    
    user_input_corrected = similarity_df.loc[similarity_df.similarity == max(similarity_df.similarity), "title"].values[0]   ### correcting the user input of song with the most similar song title from the top chart, given similarity score is larger than 0.9
    user_input_corrected_and_cleaned = user_input_corrected.replace(" ","").lower()   ### getting generic corrected user input of song by converting to lowercase and removing space
    
    if similarity_count>0:   ### when there is a song match from the top chart
        suggestion = top_chart_df[top_chart_df["title"]!=user_input_corrected].sample(1)   ### looking for another track in the top chart as track suggestion
        suggestion_title = suggestion.iloc[0][0]   ### locating the track suggestion title
        suggestion_artist = suggestion.iloc[0][1]   ### locating the track suggestion artist
        suggestion_id = sp.search(q=suggestion_title+" "+suggestion_artist ,limit=1,market="GB")['tracks']['items'][0]['id']   ### looking for the spotify id of the track suggestion
        
        if user_input_corrected_and_cleaned in duplicate_list:   ### checking if there are duplicate titles in the top chart for the user input of song
            print(top_chart_df[top_chart_df.title == user_input_corrected])   ### displaying the duplicate song titles and its artists
            artist_user_input = input("Please input the artist: ")   ### asking user to input the artist
            artist_user_input_cleaned = artist_user_input.replace(" ", "").lower()   ### getting generic user input of song by converting to lowercase and removing space
            
            suggestion2 = top_chart_df[(top_chart_df["title"]!=user_input_corrected) | (top_chart_df["artist"]!=artist_user_input)].sample(1)   ### looking for another track in the top chart as track suggestion
            suggestion2_title = suggestion2.iloc[0][0]   ### locating the track suggestion title
            suggestion2_artist = suggestion2.iloc[0][1]   ### locating the track suggestion artist
            if artist_user_input_cleaned in artist_list_cleaned:
                return (suggestion2_title + " - " + suggestion2_artist)
            else:
                return "Please input a valid artist"
        else:
            print("Your song is in the top 100 chart! Here is another one from the top chart for you.")
            return IFrame(src="https://open.spotify.com/embed/track/"+suggestion_id, width="420", height="80", frameborder="0", allowtransparency="true", allow="encrypted-media",)   ### displaying the embedded spotify player for the recommended song
    
    else:   ### when there is no song match from the top chart
        print('The song you input is not in the top 100 chart.')
        try:
            result = sp.search(q=user_input ,limit=5,market="GB")   ### searching with user input of song on spotify and returning top 5 search results
            for i in range(0,len(result['tracks']['items'])):   ### displaying the top 5 search results
                print(str(i+1)+". "+(result['tracks']['items'][i]['artists'][0]['name'])+" ("+result['tracks']['items'][i]['name']+")")
            
            number_input = int(input("Which song and artists do you mean? Please input the number: "))   ### asking user to choose from the top search results
            
            new_input_uri = result['tracks']['items'][number_input-1]['uri']   ### locating the track based on user input
            my_dict = sp.audio_features(new_input_uri)[0]   ### getting the audio features of the chosen track
            my_dict_new = { key: [my_dict[key]] for key in list(my_dict.keys()) }   ### converting the extracted audio features for dataframe format
            audio_feature_df = pd.DataFrame(my_dict_new)   ### saving the extracted audio features in a dataframe
            
            audio_feature_df.drop(columns=['type','uri','track_href','analysis_url','duration_ms','time_signature','id'],inplace=True)   ### dropping irrelvant columns from audio feature dataframe
            
            audio_feature_tf_df = scaler.transform(audio_feature_df)   ### scaling the audio feature data with the scaler from the model (refer to Jupyter Notebook 2_Clustering) 
            user_input_cluster = kmeans.predict(audio_feature_tf_df)[0]   ### predicting the cluster of the user in put of song based on the K-means model (refer to Jupyter Notebook 2_Clustering)
            
            track_id = upsampled_df[upsampled_df['cluster']==user_input_cluster].sample(1)['track_id'].values[0]   ### looking for the track id from the song database, sampling from tracks in the same cluster as the predicted cluster
            print('\n')
            print('Here is another similar song for you.')
            return IFrame(src="https://open.spotify.com/embed/track/"+track_id, width="420", height="80", frameborder="0", allowtransparency="true", allow="encrypted-media",)   ### displaying the embedded spotify player for the recommended song
        except:
            print('\n')
            print("Song not found!") 

In [147]:
song_recommender()

Please input a song: you're not alone


The song you input is not in the top 100 chart.
1. Olive (You're Not Alone)
2. Eminem (Not Afraid)
3. Amon Düül II (You're Not Alone)
4. The Enemy (You're Not Alone)
5. Andrew Garfield (30/90 (from "tick, tick... BOOM!" Soundtrack from the Netflix Film))
Which song and artists do you mean? Please input the number: 1


Here is another similar song for you.


