In [17]:
import pandas as pd
import numpy as np

# Load the dataset
df = pd.read_csv('data/spotify_dataset.csv')

# Preprocess the dataset
# Performing a normalization on the tempo and energy values so that different scaling has no influence
df['tempo'] = (df['tempo'] - df['tempo'].mean()) / df['tempo'].std()
df['energy'] = (df['energy'] - df['energy'].mean()) / df['energy'].std()


In [18]:
def calculate_similarity(song1, song2):
    tempo_diff = song1['tempo'] - song2['tempo']
    energy_diff = song1['energy'] - song2['energy']
    genre_similarity = 1 if song1['genre'] == song2['genre'] else 0
    
    return np.sqrt(tempo_diff**2 + energy_diff**2 + (1-genre_similarity)**2)


In [19]:
def find_similar_songs(song_name):
    # Find the song from the dataset
    song = df.loc[df['song'] == song_name].iloc[0]

    # Calculate similarity with all other songs in the dataset
    df['similarity'] = df.apply(lambda row: calculate_similarity(row, song), axis=1)

    # Sort the dataset based on similarity
    sorted_df = df.sort_values(by='similarity')

    # Return the top 5 most similar songs
    top_5_similar_songs = sorted_df.iloc[1:6]

    return top_5_similar_songs[['song', 'artist', 'genre']]


In [20]:
def similarity_search():
    print("Welcome to Similarity Search:\nInsert a song name in the input window, we will find you the five most similar songs to it.\n")
    
    while True:
        # Get song name from user input
        song_name = input("Enter the name of the song: ")
        result = df[df['song'] == song_name]
        if not result.empty:
            break
        else:
            print("Your song is apparently not in the dataset. Try a valid song title.\n")

    
    result = find_similar_songs(song_name)
    print("Your five most similar songs are:\n{}\n\nHave fun listening!".format(result))


In [21]:
similarity_search()


Welcome to Similarity Search:
Insert a song name in the input window, we will find you the five most similar songs to it.

Your song is apparently not in the dataset. Try a valid song title.

Your five most similar songs are:
                            song         artist genre
479              My Happy Ending  Avril Lavigne   pop
12                    Freestyler   Bomfunk MC's   pop
747                   Girlfriend  Avril Lavigne   pop
86              It's Gonna Be Me         *NSYNC   pop
1392  Here's to Never Growing Up  Avril Lavigne   pop

Have fun listening!
