In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from scipy.spatial.distance import euclidean

# Loading dataset
songs_df = pd.read_csv("cleaned_data/songs_cleaned.csv")

# Selecting attributes/features to be included in similarity calculation 
features = ['danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 
            'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms']

# Normalize features (0-1 range)
scaler = MinMaxScaler()
songs_df_scaled = songs_df.copy()
songs_df_scaled[features] = scaler.fit_transform(songs_df[features])

In [12]:
# Altering function to take a playlist an input 


# Function that takes single input song and outputs similar songs using Euclidean distance
def recommend_songs_from_playlist(playlist, top_n=5):

    if len(playlist) < 1:
        return "Playlist empty"
    
    # Making sure all songs in playlist are in database
    for song in playlist:
        if song not in songs_df_scaled['track_name'].values:
            return f"Song {song} not found in dataset."

    # Getting average feature vector for the playlist
    playlist_vector = songs_df_scaled[songs_df_scaled['track_name'].isin(playlist)][features].mean().values

    # Removing playlist songs from song df (so the system doesn't recommend the input songs)
    other_songs_df_scaled = songs_df_scaled[~songs_df_scaled['track_name'].isin(playlist)].copy()

    # Computing distances to all other songs
    other_songs_df_scaled.loc[:, 'distance'] = other_songs_df_scaled[features].apply(lambda x: euclidean(playlist_vector, x), axis=1)

    # Getting top N closest songs 
    recommendations = other_songs_df_scaled.sort_values(by='distance')[1:top_n+1]

    return recommendations[['track_name', 'track_artist', 'distance']]


In [None]:
# Example(s)

# NOTE: both examples outputted two different versions of a song; when we clean, 
# should we be sorting out duplicate song/artist combos? or is it all right? 

# Other NOTE: should we be inputting more than song names in playlists? 
# Multiple versions of "Here Comes the Sun", for example

playlist1 = ["Higher Love", "Sweet Dreams (Are Made of This)"]
print(recommend_songs_from_playlist(playlist1, top_n=5))

playlist2 = ["Lose Yourself to Dance", "Stole the Show", "Barbie Girl"]
print(recommend_songs_from_playlist(playlist2, top_n=5))


                 track_name    track_artist  distance
3356   Stay a Little Longer     Lostboycrow  0.108815
1307              Paparazzi       Lady Gaga  0.110252
14382                  2 On         Tinashe  0.113623
245              No Diggity  Campsite Dream  0.123341
2941             No Diggity  Campsite Dream  0.123341
                               track_name             track_artist  distance
15844  Boy You Knock Me Out - Single Edit              Tatyana Ali  0.137060
4988   They Reminisce Over You (T.R.O.Y.)  Pete Rock & C.L. Smooth  0.153421
16678  They Reminisce Over You (T.R.O.Y.)  Pete Rock & C.L. Smooth  0.153421
15059                      My Prerogative              Bobby Brown  0.154499
15735                             Uhh Ahh              Boyz II Men  0.156093
