In [82]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import ndcg_score
import matplotlib.pyplot as plt

In [83]:
# THIS IS WITH ONE HOT ENCODING

data_original = pd.read_csv('data.csv')
data_original.head()

Unnamed: 0,valence,year,acousticness,artists,danceability,duration_ms,energy,explicit,id,instrumentalness,key,liveness,loudness,mode,name,popularity,release_date,speechiness,tempo
0,0.0594,1921,0.982,"['Sergei Rachmaninoff', 'James Levine', 'Berli...",0.279,831667,0.211,0,4BJqT0PrAfrxzMOxytFOIz,0.878,10,0.665,-20.096,1,"Piano Concerto No. 3 in D Minor, Op. 30: III. ...",4,1921,0.0366,80.954
1,0.963,1921,0.732,['Dennis Day'],0.819,180533,0.341,0,7xPhfUan2yNtyFG0cUWkt8,0.0,7,0.16,-12.441,1,Clancy Lowered the Boom,5,1921,0.415,60.936
2,0.0394,1921,0.961,['KHP Kridhamardawa Karaton Ngayogyakarta Hadi...,0.328,500062,0.166,0,1o6I8BglA6ylDMrIELygv1,0.913,3,0.101,-14.85,1,Gati Bali,5,1921,0.0339,110.339
3,0.165,1921,0.967,['Frank Parker'],0.275,210000,0.309,0,3ftBPsC5vPBKxYSee08FDH,2.8e-05,5,0.381,-9.316,1,Danny Boy,3,1921,0.0354,100.109
4,0.253,1921,0.957,['Phil Regan'],0.418,166693,0.193,0,4d6HGyGT8e121BsdKmw9v6,2e-06,3,0.229,-10.096,1,When Irish Eyes Are Smiling,2,1921,0.038,101.665


In [84]:
# Standardize some relevent columns from the data -> Converts to Numpy Array first

num_col_names = ['valence', 'acousticness', 'danceability', 'energy', 'instrumentalness', 'liveness', 'speechiness']
data_numOnly = data_original[num_col_names]

np_data = np.array(data_numOnly)

data_mean = np_data.mean(axis=0)
data_std = np_data.std(axis=0)

standardized_data = (np_data - data_mean)/(data_std)

In [85]:
# Copy the standardized data back into a dataframe

data = data_original.copy()

data[num_col_names] = standardized_data

data.head()

Unnamed: 0,valence,year,acousticness,artists,danceability,duration_ms,energy,explicit,id,instrumentalness,key,liveness,loudness,mode,name,popularity,release_date,speechiness,tempo
0,-1.782825,1921,1.276187,"['Sergei Rachmaninoff', 'James Levine', 'Berli...",-1.467013,831667,-1.013988,0,4BJqT0PrAfrxzMOxytFOIz,2.268102,10,2.626719,-20.096,1,"Piano Concerto No. 3 in D Minor, Op. 30: III. ...",4,1921,-0.379706,80.954
1,1.650688,1921,0.611347,['Dennis Day'],1.598779,180533,-0.52827,0,7xPhfUan2yNtyFG0cUWkt8,-0.532771,7,-0.262229,-12.441,1,Clancy Lowered the Boom,5,1921,1.945481,60.936
2,-1.858821,1921,1.22034,['KHP Kridhamardawa Karaton Ngayogyakarta Hadi...,-1.18882,500062,-1.182122,0,1o6I8BglA6ylDMrIELygv1,2.379754,3,-0.599749,-14.85,1,Gati Bali,5,1921,-0.396297,110.339
3,-1.381564,1921,1.236296,['Frank Parker'],-1.489722,210000,-0.647832,0,3ftBPsC5vPBKxYSee08FDH,-0.532682,5,1.002043,-9.316,1,Danny Boy,3,1921,-0.38708,100.109
4,-1.04718,1921,1.209703,['Phil Regan'],-0.677855,166693,-1.081242,0,4d6HGyGT8e121BsdKmw9v6,-0.532765,3,0.132499,-10.096,1,When Irish Eyes Are Smiling,2,1921,-0.371104,101.665


## NDCG Test Items:

In [86]:
def calculate_avg_ndcg(recommended_sim_list, ideal_score_list):
    
    total_ndcg_score = 0
    i = 0
    
    for recommended_sim_songs in recommended_sim_list:
        scores = np.asarray([recommended_sim_songs['true_relevence']])
        ideal_scores = np.asarray([ideal_score_list[i]])
        
        total_ndcg_score += ndcg_score(ideal_scores, scores)
        i += 1
    
    avg_ndcg_score = total_ndcg_score/i
    
    return avg_ndcg_score

### Test Data 1: Input Song is Street Lights

In [87]:
# List that shows what features the model is going to use

selected_features = ['year', 'valence', 'acousticness', 'danceability', 'artists', 'energy', 'explicit', 'instrumentalness', 'liveness', 'mode', 'speechiness', 'name']

In [88]:
# Test Data 1 - Input Song is Street Lights by Kanye West

inputSong1 = data.loc[data['id'] == "6j8gTlbhj9KJSeypNcNAS9"]
inputSong1 = inputSong1[selected_features]

# The "database" that the System will choose from
# Intentionally choosing 5 songs that are "similarish" to the input song

ModelPlaylist1 = data.loc[data['id'].isin([ 
                                          "7Cu2COdH93MnuireuKNiS3", 
                                          "02LAK7qT1wya0klSeNO96f", 
                                          "4jQqM4NI79HEcWHUJb8Hvf", 
                                          "4cAgkb0ifwn0FSHGXnr4F6",
                                          "4rwpZEcnalkuhPyGkEdhu0","5i7fZq3chLyCHo3VeB6goD", "6I9sncEmtGc9rpKyb8U1f8"])]

ModelPlaylist1 = ModelPlaylist1[selected_features]
ModelPlaylist1['true_relevence'] = [1,3,0,0,0,2,0] # Higher score means more relevence

ModelPlaylist1

Unnamed: 0,year,valence,acousticness,danceability,artists,energy,explicit,instrumentalness,liveness,mode,speechiness,name,true_relevence
17048,2007,-1.537357,-0.960333,0.026141,['Kanye West'],-0.061233,1,-0.531351,-0.462453,1,-0.093974,I Wonder,1
18725,2015,-1.46896,0.582094,-0.723274,['Daniel Caesar'],-0.688931,0,-0.532771,-0.27367,1,-0.313957,Streetcar,3
36527,2010,-0.777394,-1.326367,-0.081729,['Skrillex'],1.724715,0,1.244093,-0.496777,0,-0.090287,Scary Monsters and Nice Sprites,0
36779,2011,0.081365,-1.316395,-0.615404,['Skrillex'],1.73966,0,-0.485558,-0.713019,0,0.790261,First of the Year (Equinox),0
37189,2013,-0.6444,-0.896509,-0.178245,['Kanye West'],0.402067,1,-0.532736,-0.628353,1,-0.137602,New Slaves,0
54820,2008,-0.462009,-0.917784,0.911814,['Kanye West'],-0.132223,0,-0.514779,1.13934,0,0.022163,RoboCop,2
73157,2010,0.38155,-1.329479,1.42278,['Skrillex'],1.463174,0,-0.525401,-0.606614,1,-0.10995,Kill EVERYBODY,0


### Test Data 2: Input Song is Ultralight Beam

In [89]:
# Test Data 2 - Input Song is Ultralight Beam by Kanye West

inputSong2 = data.loc[data['id'] == "1eQBEelI2NCy7AUTerX0KS"]
inputSong2 = inputSong2[selected_features]

# The "database" that the System will choose from
ModelPlaylist2 = data.loc[data['id'].isin([
                                           "3ZLyt2ndLFBh148XRYjYYZ", 
                                           "0oPOuDmmkVp3h6puekhs6P", 
                                           "2kH3RZN9KdGBj1c1jL3GjO", 
                                           "722tgOgdIbNe3BEyLnejw4", 
                                           "4cAgkb0ifwn0FSHGXnr4F6",
                                           "4rwpZEcnalkuhPyGkEdhu0","5i7fZq3chLyCHo3VeB6goD", "6I9sncEmtGc9rpKyb8U1f8"])]

ModelPlaylist2 = ModelPlaylist2[selected_features]
ModelPlaylist2['true_relevence'] = [0,0,0,0,3,0,3,2] # Higher score means more relevence

ModelPlaylist2

Unnamed: 0,year,valence,acousticness,danceability,artists,energy,explicit,instrumentalness,liveness,mode,speechiness,name,true_relevence
18255,2013,-0.773594,-1.332378,1.297877,['Kanye West'],1.220315,1,-0.532771,-0.216463,1,1.10979,Black Skinhead,0
36527,2010,-0.777394,-1.326367,-0.081729,['Skrillex'],1.724715,0,1.244093,-0.496777,0,-0.090287,Scary Monsters and Nice Sprites,0
36779,2011,0.081365,-1.316395,-0.615404,['Skrillex'],1.73966,0,-0.485558,-0.713019,0,0.790261,First of the Year (Equinox),0
37189,2013,-0.6444,-0.896509,-0.178245,['Kanye West'],0.402067,1,-0.532736,-0.628353,1,-0.137602,New Slaves,0
56506,2016,-0.496207,-0.516221,-0.03631,"['Chance the Rapper', 'Kanye West', ""Chicago C...",0.230197,0,-0.532765,-0.699861,0,2.234286,All We Got (feat. Kanye West & Chicago Childre...,3
73157,2010,0.38155,-1.329479,1.42278,['Skrillex'],1.463174,0,-0.525401,-0.606614,1,-0.10995,Kill EVERYBODY,0
124601,2019,-1.335966,0.297543,0.417881,"['Kanye West', 'Clipse', 'Kenny G']",-0.087388,0,-0.532771,-0.456732,0,-0.345296,Use This Gospel,3
152590,2004,0.609539,0.18319,0.48601,['Kanye West'],1.332404,0,-0.532771,0.498623,1,1.798006,Jesus Walks,2


### Test Data 3: Input Song is Diles

In [90]:
# Test Data 3 - Input Song is diles

inputSong3 = data.loc[data['id'] == "6C1RD7YQVvt3YQj0CmuTeu"]
inputSong3 = inputSong3[selected_features]

# The "database" that the System will choose from
ModelPlaylist3 = data.loc[data['id'].isin([
                                           "1RijmOnza5p9CzaFKr8W1Y", 
                                           "278kSqsZIiYp8p3QjYAqa8", 
                                           "42FWqCxAw5aG1FvjyVjIlH", 
                                           "0EhpEsp4L0oRGM0vmeaN5e", 
                                           "2iIRl5jf9beA2ExZWzn2nt"])]

ModelPlaylist3 = ModelPlaylist3[selected_features]
ModelPlaylist3['true_relevence'] = [2,0,4,4,4] # Higher score means more relevence

ModelPlaylist3

Unnamed: 0,year,valence,acousticness,danceability,artists,energy,explicit,instrumentalness,liveness,mode,speechiness,name,true_relevence
19672,2020,-0.515206,-1.321049,0.190786,"['J Balvin', 'Tainy', 'Dua Lipa', 'Bad Bunny']",0.786905,0,-0.532771,-0.18786,0,-0.269715,UN DIA (ONE DAY) (Feat. Tainy),2
27927,1967,-0.367014,-1.066708,-1.171788,['The Beatles'],0.144263,0,-0.532771,-0.296553,0,-0.371718,Lucy In The Sky With Diamonds - Remix,0
38078,2018,-0.727996,0.1433,1.638521,['Bad Bunny'],-0.12475,1,-0.532712,0.384209,0,-0.190447,NI BIEN NI MAL,4
140205,2017,-0.990183,-0.7662,1.621488,"['Bad Bunny', 'J Balvin', 'Ozuna', 'Arcangel']",-0.09486,1,-0.532771,0.836143,0,0.126624,Soy Peor - Remix,4
155116,2018,-1.408163,-0.2689,2.160841,"['Darell', 'Casper Magico', 'Nio Garcia', 'Nic...",0.446902,0,-0.532747,-0.717023,0,0.102045,Te Boté,4


### Test Data 4: Input Song is Piano Man

In [91]:
# Test Data 4 - Input Song is Piano Man

inputSong4 = data.loc[data['id'] == "70C4NyhjD5OZUMzvWZ3njJ"]
inputSong4 = inputSong4[selected_features]

# The "database" that the System will choose from
ModelPlaylist4 = data.loc[data['id'].isin([
                                           "4U45aEWtQhrm8A5mxPaFZ7", 
                                           "1xOXXYh6lTW8laxlW7JP2J", 
                                           "6ByRaaLxtbMFyIzQob2nDT", 
                                           "3utq2FgD1pkmIoaWfjXWAU", 
                                           "4rwpZEcnalkuhPyGkEdhu0","5i7fZq3chLyCHo3VeB6goD", "6I9sncEmtGc9rpKyb8U1f8"])]

ModelPlaylist4 = ModelPlaylist4[selected_features]
ModelPlaylist4['true_relevence'] = [4,3,1,2, 0, 0, 0] #Higher score means more relevence

ModelPlaylist4

Unnamed: 0,year,valence,acousticness,danceability,artists,energy,explicit,instrumentalness,liveness,mode,speechiness,name,true_relevence
11012,1977,-0.838191,0.417214,-0.030633,['Billy Joel'],0.047119,0,-0.532771,-0.746199,1,-0.393839,Vienna,4
11027,1977,0.883126,-0.912465,0.236205,['Billy Joel'],1.455702,0,-0.532771,-0.642654,1,0.372415,Only the Good Die Young,3
11063,1977,-0.720396,-0.252944,-0.694887,['Billy Joel'],1.676143,0,-0.531172,-0.52538,1,0.169637,Scenes from an Italian Restaurant,1
11154,1977,-1.164975,-0.896509,0.718783,['Billy Joel'],1.478119,0,-0.532746,-0.713019,0,-0.090287,The Stranger,2
36527,2010,-0.777394,-1.326367,-0.081729,['Skrillex'],1.724715,0,1.244093,-0.496777,0,-0.090287,Scary Monsters and Nice Sprites,0
36779,2011,0.081365,-1.316395,-0.615404,['Skrillex'],1.73966,0,-0.485558,-0.713019,0,0.790261,First of the Year (Equinox),0
73157,2010,0.38155,-1.329479,1.42278,['Skrillex'],1.463174,0,-0.525401,-0.606614,1,-0.10995,Kill EVERYBODY,0


## Getting Recommendations (Cosine Similarity)

In [151]:
def get_recommendations(inputSong, ModelPlaylist):
    # Not changing the raw data. Just using most of the given numerical columns (look at the original "data" variable)

    recommended_sim = ModelPlaylist.copy()
    nparray_sim = cosine_similarity(inputSong.drop(['name', 'artists', "true_relevance"], axis = 1).values, ModelPlaylist.drop(['name', 'artists', "true_relevance"], axis = 1).values)

    # Ordering the recommended songs from most to least
    recommended_sim['sim'] = nparray_sim.tolist()[0]
    recommended_sim_songs = recommended_sim.sort_values('sim',ascending = False)
    
    return recommended_sim_songs

In [177]:
#inputSongs: songs which you want recommendations for
#ModePlaylist: all songs we are comparing against
#n: how many top songs from each recommendation you want
#Returns: list of recommendations
def playlist_vector(inputSongs, ModelPlaylist, n):
    temp = None
    flag = False
    for i in range(0,inputSongs.shape[0]):
        recommendations = get_recommendations(inputSongs.iloc[[i]], ModelPlaylist)
        for j in range(0,n):
            if flag == False:
                flag = True;
                temp = recommendations.iloc[[j]]
            else:
                temp = temp.append(recommendations.iloc[j], ignore_index=True)
    return temp

        

In [179]:

t = playlist_vector(ModelPlaylist1, ModelPlaylist2,2)
t

  temp = temp.append(recommendations.iloc[j], ignore_index=True)
  temp = temp.append(recommendations.iloc[j], ignore_index=True)
  temp = temp.append(recommendations.iloc[j], ignore_index=True)
  temp = temp.append(recommendations.iloc[j], ignore_index=True)
  temp = temp.append(recommendations.iloc[j], ignore_index=True)
  temp = temp.append(recommendations.iloc[j], ignore_index=True)
  temp = temp.append(recommendations.iloc[j], ignore_index=True)


Unnamed: 0,year,valence,acousticness,danceability,artists,energy,explicit,instrumentalness,liveness,mode,speechiness,name,true_relevence,sim
0,2013,-0.6444,-0.896509,-0.178245,['Kanye West'],0.402067,1,-0.532736,-0.628353,1,-0.137602,New Slaves,0,1.0
1,2013,-0.773594,-1.332378,1.297877,['Kanye West'],1.220315,1,-0.532771,-0.216463,1,1.10979,Black Skinhead,0,0.999999
2,2019,-1.335966,0.297543,0.417881,"['Kanye West', 'Clipse', 'Kenny G']",-0.087388,0,-0.532771,-0.456732,0,-0.345296,Use This Gospel,3,1.0
3,2016,-0.496207,-0.516221,-0.03631,"['Chance the Rapper', 'Kanye West', ""Chicago C...",0.230197,0,-0.532765,-0.699861,0,2.234286,All We Got (feat. Kanye West & Chicago Childre...,3,0.999999
4,2010,-0.777394,-1.326367,-0.081729,['Skrillex'],1.724715,0,1.244093,-0.496777,0,-0.090287,Scary Monsters and Nice Sprites,0,1.0
5,2011,0.081365,-1.316395,-0.615404,['Skrillex'],1.73966,0,-0.485558,-0.713019,0,0.790261,First of the Year (Equinox),0,0.999999
6,2011,0.081365,-1.316395,-0.615404,['Skrillex'],1.73966,0,-0.485558,-0.713019,0,0.790261,First of the Year (Equinox),0,1.0
7,2010,-0.777394,-1.326367,-0.081729,['Skrillex'],1.724715,0,1.244093,-0.496777,0,-0.090287,Scary Monsters and Nice Sprites,0,0.999999
8,2013,-0.6444,-0.896509,-0.178245,['Kanye West'],0.402067,1,-0.532736,-0.628353,1,-0.137602,New Slaves,0,1.0
9,2013,-0.773594,-1.332378,1.297877,['Kanye West'],1.220315,1,-0.532771,-0.216463,1,1.10979,Black Skinhead,0,0.999999


In [136]:
ModelPlaylist1

Unnamed: 0,year,valence,acousticness,danceability,artists,energy,explicit,instrumentalness,liveness,mode,speechiness,name,true_relevence
17048,2007,-1.537357,-0.960333,0.026141,['Kanye West'],-0.061233,1,-0.531351,-0.462453,1,-0.093974,I Wonder,1
18725,2015,-1.46896,0.582094,-0.723274,['Daniel Caesar'],-0.688931,0,-0.532771,-0.27367,1,-0.313957,Streetcar,3
36527,2010,-0.777394,-1.326367,-0.081729,['Skrillex'],1.724715,0,1.244093,-0.496777,0,-0.090287,Scary Monsters and Nice Sprites,0
36779,2011,0.081365,-1.316395,-0.615404,['Skrillex'],1.73966,0,-0.485558,-0.713019,0,0.790261,First of the Year (Equinox),0
37189,2013,-0.6444,-0.896509,-0.178245,['Kanye West'],0.402067,1,-0.532736,-0.628353,1,-0.137602,New Slaves,0
54820,2008,-0.462009,-0.917784,0.911814,['Kanye West'],-0.132223,0,-0.514779,1.13934,0,0.022163,RoboCop,2
73157,2010,0.38155,-1.329479,1.42278,['Skrillex'],1.463174,0,-0.525401,-0.606614,1,-0.10995,Kill EVERYBODY,0


In [137]:
ModelPlaylist2

Unnamed: 0,year,valence,acousticness,danceability,artists,energy,explicit,instrumentalness,liveness,mode,speechiness,name,true_relevence
18255,2013,-0.773594,-1.332378,1.297877,['Kanye West'],1.220315,1,-0.532771,-0.216463,1,1.10979,Black Skinhead,0
36527,2010,-0.777394,-1.326367,-0.081729,['Skrillex'],1.724715,0,1.244093,-0.496777,0,-0.090287,Scary Monsters and Nice Sprites,0
36779,2011,0.081365,-1.316395,-0.615404,['Skrillex'],1.73966,0,-0.485558,-0.713019,0,0.790261,First of the Year (Equinox),0
37189,2013,-0.6444,-0.896509,-0.178245,['Kanye West'],0.402067,1,-0.532736,-0.628353,1,-0.137602,New Slaves,0
56506,2016,-0.496207,-0.516221,-0.03631,"['Chance the Rapper', 'Kanye West', ""Chicago C...",0.230197,0,-0.532765,-0.699861,0,2.234286,All We Got (feat. Kanye West & Chicago Childre...,3
73157,2010,0.38155,-1.329479,1.42278,['Skrillex'],1.463174,0,-0.525401,-0.606614,1,-0.10995,Kill EVERYBODY,0
124601,2019,-1.335966,0.297543,0.417881,"['Kanye West', 'Clipse', 'Kenny G']",-0.087388,0,-0.532771,-0.456732,0,-0.345296,Use This Gospel,3
152590,2004,0.609539,0.18319,0.48601,['Kanye West'],1.332404,0,-0.532771,0.498623,1,1.798006,Jesus Walks,2


In [None]:
# Get the recommended songs for our test data
song1_recommended = get_recommendations(inputSong1, ModelPlaylist1)
song2_recommended = get_recommendations(inputSong2, ModelPlaylist2)
song3_recommended = get_recommendations(inputSong3, ModelPlaylist3)
song4_recommended = get_recommendations(inputSong4, ModelPlaylist4)


In [None]:
# The best score == num of input songs. So if we recommending songs for 2 songs, a perfect score is 2
calculate_avg_ndcg([song1_recommended, song2_recommended, song3_recommended, song4_recommended],
               [[3, 2, 1, 0, 0, 0, 0], [3, 3, 2, 0, 0, 0, 0, 0], [4, 4, 4, 2, 0], [4, 3, 2, 1, 0, 0, 0]])

0.8361702005872358

In [None]:
display("TEST DATA 1 OUTPUT:", song1_recommended)
display("TEST DATA 2 OUTPUT:", song2_recommended)
display("TEST DATA 3 OUTPUT:", song3_recommended)
display("TEST DATA 4 OUTPUT:", song4_recommended)

'TEST DATA 1 OUTPUT:'

Unnamed: 0,year,valence,acousticness,danceability,artists,energy,explicit,instrumentalness,liveness,mode,speechiness,name,true_relevence,sim
37189,2013,-0.6444,-0.896509,-0.178245,['Kanye West'],0.402067,1,-0.532736,-0.628353,1,-0.137602,New Slaves,0,1.0
54820,2008,-0.462009,-0.917784,0.911814,['Kanye West'],-0.132223,0,-0.514779,1.13934,0,0.022163,RoboCop,2,1.0
17048,2007,-1.537357,-0.960333,0.026141,['Kanye West'],-0.061233,1,-0.531351,-0.462453,1,-0.093974,I Wonder,1,1.0
73157,2010,0.38155,-1.329479,1.42278,['Skrillex'],1.463174,0,-0.525401,-0.606614,1,-0.10995,Kill EVERYBODY,0,1.0
18725,2015,-1.46896,0.582094,-0.723274,['Daniel Caesar'],-0.688931,0,-0.532771,-0.27367,1,-0.313957,Streetcar,3,0.999999
36779,2011,0.081365,-1.316395,-0.615404,['Skrillex'],1.73966,0,-0.485558,-0.713019,0,0.790261,First of the Year (Equinox),0,0.999999
36527,2010,-0.777394,-1.326367,-0.081729,['Skrillex'],1.724715,0,1.244093,-0.496777,0,-0.090287,Scary Monsters and Nice Sprites,0,0.999999


'TEST DATA 2 OUTPUT:'

Unnamed: 0,year,valence,acousticness,danceability,artists,energy,explicit,instrumentalness,liveness,mode,speechiness,name,true_relevence,sim
152590,2004,0.609539,0.18319,0.48601,['Kanye West'],1.332404,0,-0.532771,0.498623,1,1.798006,Jesus Walks,2,0.999999
56506,2016,-0.496207,-0.516221,-0.03631,"['Chance the Rapper', 'Kanye West', ""Chicago C...",0.230197,0,-0.532765,-0.699861,0,2.234286,All We Got (feat. Kanye West & Chicago Childre...,3,0.999999
18255,2013,-0.773594,-1.332378,1.297877,['Kanye West'],1.220315,1,-0.532771,-0.216463,1,1.10979,Black Skinhead,0,0.999998
124601,2019,-1.335966,0.297543,0.417881,"['Kanye West', 'Clipse', 'Kenny G']",-0.087388,0,-0.532771,-0.456732,0,-0.345296,Use This Gospel,3,0.999998
37189,2013,-0.6444,-0.896509,-0.178245,['Kanye West'],0.402067,1,-0.532736,-0.628353,1,-0.137602,New Slaves,0,0.999998
36779,2011,0.081365,-1.316395,-0.615404,['Skrillex'],1.73966,0,-0.485558,-0.713019,0,0.790261,First of the Year (Equinox),0,0.999998
73157,2010,0.38155,-1.329479,1.42278,['Skrillex'],1.463174,0,-0.525401,-0.606614,1,-0.10995,Kill EVERYBODY,0,0.999997
36527,2010,-0.777394,-1.326367,-0.081729,['Skrillex'],1.724715,0,1.244093,-0.496777,0,-0.090287,Scary Monsters and Nice Sprites,0,0.999997


'TEST DATA 3 OUTPUT:'

Unnamed: 0,year,valence,acousticness,danceability,artists,energy,explicit,instrumentalness,liveness,mode,speechiness,name,true_relevence,sim
140205,2017,-0.990183,-0.7662,1.621488,"['Bad Bunny', 'J Balvin', 'Ozuna', 'Arcangel']",-0.09486,1,-0.532771,0.836143,0,0.126624,Soy Peor - Remix,4,1.0
38078,2018,-0.727996,0.1433,1.638521,['Bad Bunny'],-0.12475,1,-0.532712,0.384209,0,-0.190447,NI BIEN NI MAL,4,1.0
155116,2018,-1.408163,-0.2689,2.160841,"['Darell', 'Casper Magico', 'Nio Garcia', 'Nic...",0.446902,0,-0.532747,-0.717023,0,0.102045,Te Boté,4,1.0
19672,2020,-0.515206,-1.321049,0.190786,"['J Balvin', 'Tainy', 'Dua Lipa', 'Bad Bunny']",0.786905,0,-0.532771,-0.18786,0,-0.269715,UN DIA (ONE DAY) (Feat. Tainy),2,1.0
27927,1967,-0.367014,-1.066708,-1.171788,['The Beatles'],0.144263,0,-0.532771,-0.296553,0,-0.371718,Lucy In The Sky With Diamonds - Remix,0,0.999999


'TEST DATA 4 OUTPUT:'

Unnamed: 0,year,valence,acousticness,danceability,artists,energy,explicit,instrumentalness,liveness,mode,speechiness,name,true_relevence,sim
11012,1977,-0.838191,0.417214,-0.030633,['Billy Joel'],0.047119,0,-0.532771,-0.746199,1,-0.393839,Vienna,4,1.0
11063,1977,-0.720396,-0.252944,-0.694887,['Billy Joel'],1.676143,0,-0.531172,-0.52538,1,0.169637,Scenes from an Italian Restaurant,1,1.0
11027,1977,0.883126,-0.912465,0.236205,['Billy Joel'],1.455702,0,-0.532771,-0.642654,1,0.372415,Only the Good Die Young,3,0.999999
36779,2011,0.081365,-1.316395,-0.615404,['Skrillex'],1.73966,0,-0.485558,-0.713019,0,0.790261,First of the Year (Equinox),0,0.999999
11154,1977,-1.164975,-0.896509,0.718783,['Billy Joel'],1.478119,0,-0.532746,-0.713019,0,-0.090287,The Stranger,2,0.999999
36527,2010,-0.777394,-1.326367,-0.081729,['Skrillex'],1.724715,0,1.244093,-0.496777,0,-0.090287,Scary Monsters and Nice Sprites,0,0.999999
73157,2010,0.38155,-1.329479,1.42278,['Skrillex'],1.463174,0,-0.525401,-0.606614,1,-0.10995,Kill EVERYBODY,0,0.999999
