In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from scipy.spatial.distance import euclidean

# Loading dataset
songs_df = pd.read_csv("cleaned_data/songs_cleaned.csv")

# Selecting attributes/features to be included in similarity calculation 
features = ['danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 
            'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms']

# Normalize features (0-1 range)
scaler = MinMaxScaler()
songs_df_scaled = songs_df.copy()
songs_df_scaled[features] = scaler.fit_transform(songs_df[features])

# Function that takes input song and outputs similar songs using Euclidean distance
def recommend_songs(song_name, top_n=5):
    if song_name not in songs_df_scaled['track_name'].values:
        return "Song not found in dataset."

    # Getting feature vector of the input song
    song_vector = songs_df_scaled[songs_df_scaled['track_name'] == song_name][features].values[0]

    # Taking current song out of the song df (so the system doesn't recommend the input song)
    other_songs_df_scaled = songs_df_scaled[songs_df_scaled['track_name'] != song_name].copy()

    # Computing distances to all other songs
    other_songs_df_scaled['distance'] = other_songs_df_scaled[features].apply(lambda x: euclidean(song_vector, x), axis=1)

    # Getting top N closest songs 
    recommendations = other_songs_df_scaled.sort_values(by='distance')[1:top_n+1]

    return recommendations[['track_name', 'track_artist', 'distance']]


              track_name  track_artist  distance
11001  Hey There Delilah  Manuel Costa  0.082604
159               For Me        WAPLAN  0.084941
11222     September Song     JP Cooper  0.107912
16499             Famous     Selah Sue  0.109950
13176               Eres    Un Corazón  0.112126


In [None]:
# Example(s):
recommended_songs = recommend_songs("Higher Love", top_n=5)
print(recommended_songs)