In [2]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_distances, euclidean_distances
from scipy.sparse import lil_matrix 
import scipy.spatial.distance

In [3]:
songs = pd.read_csv("music.csv")

songs.rename(columns = {'Unnamed: 0':'song_id'}, inplace = True)
songs

qualities = ['song_id','acousticness', 'danceability', 'duration_ms', 'energy', 'instrumentalness', 'key', 'liveness', 'loudness', 
             'mode', 'speechiness', 'tempo', 'time_signature', 'valence', 'target']
details = ['song_title', 'artist']
columns = details + qualities

songs_df = songs.loc[:, columns]
songs_df.set_index('song_title', inplace=True)
songs_df = songs_df.fillna(0)

songs_df

Unnamed: 0_level_0,artist,song_id,acousticness,danceability,duration_ms,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,valence,target
song_title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Mask Off,Future,0,0.01020,0.833,204600,0.434,0.021900,2,0.1650,-8.795,1,0.4310,150.062,4,0.286,1
Redbone,Childish Gambino,1,0.19900,0.743,326933,0.359,0.006110,1,0.1370,-10.401,1,0.0794,160.083,4,0.588,1
Xanny Family,Future,2,0.03440,0.838,185707,0.412,0.000234,2,0.1590,-7.148,1,0.2890,75.044,4,0.173,1
Master Of None,Beach House,3,0.60400,0.494,199413,0.338,0.510000,5,0.0922,-15.236,1,0.0261,86.468,4,0.230,1
Parallel Lines,Junior Boys,4,0.18000,0.678,392893,0.561,0.512000,5,0.4390,-11.648,0,0.0694,174.004,4,0.904,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Like A Bitch - Kill The Noise Remix,Kill The Noise,2012,0.00106,0.584,274404,0.932,0.002690,1,0.1290,-3.501,1,0.3330,74.976,4,0.211,0
Candy,Dillon Francis,2013,0.08770,0.894,182182,0.892,0.001670,1,0.0528,-2.663,1,0.1310,110.041,4,0.867,0
Habit - Dack Janiels & Wenzday Remix,Rain Man,2014,0.00857,0.637,207200,0.935,0.003990,0,0.2140,-2.467,1,0.1070,150.082,4,0.470,0
First Contact,Twin Moons,2015,0.00164,0.557,185600,0.992,0.677000,1,0.0913,-2.735,1,0.1330,150.011,4,0.623,0


In [4]:
updatedsong_df = songs_df.drop(columns = ['artist', 'song_id', 'mode', 'target'])
updatedsong_df

Unnamed: 0_level_0,acousticness,danceability,duration_ms,energy,instrumentalness,key,liveness,loudness,speechiness,tempo,time_signature,valence
song_title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Mask Off,0.01020,0.833,204600,0.434,0.021900,2,0.1650,-8.795,0.4310,150.062,4,0.286
Redbone,0.19900,0.743,326933,0.359,0.006110,1,0.1370,-10.401,0.0794,160.083,4,0.588
Xanny Family,0.03440,0.838,185707,0.412,0.000234,2,0.1590,-7.148,0.2890,75.044,4,0.173
Master Of None,0.60400,0.494,199413,0.338,0.510000,5,0.0922,-15.236,0.0261,86.468,4,0.230
Parallel Lines,0.18000,0.678,392893,0.561,0.512000,5,0.4390,-11.648,0.0694,174.004,4,0.904
...,...,...,...,...,...,...,...,...,...,...,...,...
Like A Bitch - Kill The Noise Remix,0.00106,0.584,274404,0.932,0.002690,1,0.1290,-3.501,0.3330,74.976,4,0.211
Candy,0.08770,0.894,182182,0.892,0.001670,1,0.0528,-2.663,0.1310,110.041,4,0.867
Habit - Dack Janiels & Wenzday Remix,0.00857,0.637,207200,0.935,0.003990,0,0.2140,-2.467,0.1070,150.082,4,0.470
First Contact,0.00164,0.557,185600,0.992,0.677000,1,0.0913,-2.735,0.1330,150.011,4,0.623


In [5]:
songs_df_range = (updatedsong_df.max(axis=0) - updatedsong_df.min(axis=0))
final_df = (updatedsong_df - updatedsong_df.min(axis=0)) / songs_df_range
final_df

Unnamed: 0_level_0,acousticness,danceability,duration_ms,energy,instrumentalness,key,liveness,loudness,speechiness,tempo,time_signature,valence
song_title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Mask Off,0.010248,0.824826,0.190735,0.426363,0.022439,0.181818,0.153862,0.741141,0.514441,0.596033,0.75,0.262432
Redbone,0.199998,0.720418,0.314481,0.350081,0.006260,0.090909,0.124395,0.692162,0.071005,0.654474,0.75,0.577936
Xanny Family,0.034570,0.830626,0.171624,0.403987,0.000240,0.181818,0.147548,0.791369,0.335351,0.158539,0.75,0.144379
Master Of None,0.607034,0.431555,0.185488,0.328723,0.522541,0.454545,0.077247,0.544709,0.003784,0.225162,0.75,0.203928
Parallel Lines,0.180902,0.645012,0.381202,0.555533,0.524590,0.454545,0.442223,0.654132,0.058393,0.735659,0.75,0.908065
...,...,...,...,...,...,...,...,...,...,...,...,...
Like A Bitch - Kill The Noise Remix,0.001062,0.535963,0.261345,0.932872,0.002756,0.090909,0.115976,0.902592,0.390844,0.158142,0.75,0.184079
Candy,0.088138,0.895592,0.168058,0.892189,0.001711,0.090909,0.035782,0.928149,0.136083,0.362636,0.75,0.869411
Habit - Dack Janiels & Wenzday Remix,0.008610,0.597448,0.193365,0.935924,0.004088,0.000000,0.205430,0.934126,0.105814,0.596150,0.75,0.454659
First Contact,0.001645,0.504640,0.171516,0.993897,0.693648,0.090909,0.076300,0.925953,0.138605,0.595736,0.75,0.614501


In [7]:
target_song1 = 'Candy'
song_info = final_df.loc[target_song1]

distances = scipy.spatial.distance.cdist(final_df, [song_info], metric='euclidean')[:,0]
distances_from_query = list(zip(final_df.index, distances))

print("Top 10 similar songs to " + target_song1 + ": " + "\n")
for similar_song, similar_info in sorted(distances_from_query, key=lambda x: x[1], reverse=False)[:11]:
        artist = songs_df._get_value(similar_song, "artist", takeable=False) 
        print(similar_song + ",", artist, similar_info)
print('\n')

Top 10 similar songs to Candy: 

Candy, Dillon Francis 0.0
My Favorite Snack, Imagination Movers 0.21266182139196668
Love You Like A Love Song, Selena Gomez & The Scene 0.23136436146783437
Icarus - Black Caviar Remix, R3hab 0.25732960155190066
Ain't Nobody, Chaka Khan 0.2766731434271509
Al Filo de Tu Amor, Carlos Vives 0.2792919708941021
Wearing My Rolex - Radio Edit, Wiley 0.2835522489388007
As Long as You Love Me, Backstreet Boys 0.2842465166549455
The Way We Are, Kate Boy 0.2873375720363869
Classic, MKTO 0.28774798557708586
Ay Mi Dios, IAmChino 0.2967654574347541




In [8]:
target_song2 = 'Xanny Family'
song_info = final_df.loc[target_song2]

distances = scipy.spatial.distance.cdist(final_df, [song_info], metric='euclidean')[:,0]
distances_from_query = list(zip(final_df.index, distances))

print("Top 10 similar songs to " + target_song2 + ": " + "\n")
for similar_song, similar_info in sorted(distances_from_query, key=lambda x: x[1], reverse=False)[:11]:
        artist = songs_df._get_value(similar_song, "artist", takeable=False) 
        print(similar_song + ",", artist, similar_info)
print('\n')

Top 10 similar songs to Xanny Family: 

Xanny Family, Future 0.0
(Bonus) Break Her Down, Iamsu! 0.3087131884793907
Childs Play, Drake 0.3089956472094203
Function - feat. YG, IAmSu & Problem, E-40 0.32412529544813795
Dope, Tyga 0.34047967362029624
Slow Down, ['Poolside' 'Clyde Carson'] 0.38972854607248414
I Think She Like Me, Rick Ross 0.39545858164564074
$ave Dat Money (feat. Fetty Wap & Rich Homie Quan), Lil Dicky 0.41669424333406435
Sitting Pretty (feat. Wiz Khalifa), Ty Dolla $ign 0.4169277673616483
Smell Yo D*ck, Riskay 0.4248207466855811
Wu-Tang Forever, Drake 0.43574099385444587




In [9]:
target_song3 = 'First Contact'
song_info = final_df.loc[target_song3]

distances = scipy.spatial.distance.cdist(final_df, [song_info], metric='euclidean')[:,0]
distances_from_query = list(zip(final_df.index, distances))

print("Top 10 similar songs to " + target_song3 + ": " + "\n")
for similar_song, similar_info in sorted(distances_from_query, key=lambda x: x[1], reverse=False)[:11]:
        artist = songs_df._get_value(similar_song, "artist", takeable=False) 
        print(similar_song + ",", artist, similar_info)
print('\n')

Top 10 similar songs to First Contact: 

First Contact, Twin Moons 0.0
Disparate Youth, Santigold 0.2369116580137685
Awake, ['Tycho' 'Santé'] 0.3502211267390295
Feels Like We Only Go Backwards, Tame Impala 0.37325052789785124
New York City, Christopher Owens 0.39334030444672075
You, ['Ejeca' 'Galantis'] 0.4083903822642204
Levels - Radio Edit, Avicii 0.4154399331897071
Rock The House - Radio Edit, Afrojack 0.4161974536218591
Double Bubble Trouble, M.I.A. 0.4270163657869402
The Jam, Kideko 0.4279300644017457
When A Fire Starts To Burn - Midland Remix, Disclosure 0.4402585396176763


