# Using continous / categorical / transition matrices to get recommendations / Selective Box Cox

In [1]:
import pandas as pd
from sklearn.cluster import KMeans
import seaborn as sns
from matplotlib import pyplot as plt 
import math
from ipynb.fs.full.Utils import createKDeplot, loadDataFrames, dropFeaturesWithNoVariance
import numpy as np
from sklearn.preprocessing import StandardScaler
from scipy import stats
from ipynb.fs.full.Pipeline import removeConstantFeatures, applyBoxCoxAllFeatures, centerAndScale, applyEuclideanDistance, getIndexAndVector, applyBoxCoxFeatures,applyJaccard, getEuclideanJaccardDf 

In [2]:
transitionMatrices = pd.read_csv('./data/unified/07-transitionsClustered.csv')
transitionMatrices = transitionMatrices[['id','category']]

In [3]:
data = pd.read_csv("./data/unified/05-allDataContinousCategoricalTransition.csv")
data = data.drop(columns=['Unnamed: 0'])

In [4]:
encodedFeaturesNames = pd.read_csv("./data/unified/05-encodedFeatureNames.csv")
encodedFeaturesNames = encodedFeaturesNames['0'].tolist()
encodedFeaturesNames

['key_0',
 'key_1',
 'key_2',
 'key_3',
 'key_4',
 'key_5',
 'key_6',
 'key_7',
 'key_8',
 'key_9',
 'key_10',
 'key_11',
 'time_signature_0',
 'time_signature_1',
 'mode_0',
 'mode_1',
 'mood_1',
 'mood_2',
 'mood_3',
 'mood_4',
 'mood_5',
 'mood_6',
 'mood_7',
 'mood_8',
 'mood_9',
 'mood_10',
 'mood_11',
 'instrumentalness_cat_0',
 'instrumentalness_cat_1',
 'instrumentalness_cat_2']

In [5]:
data

Unnamed: 0,index,id,danceability,energy,speechiness,acousticness,liveness,valence,tempo,0,...,mood_5,mood_6,mood_7,mood_8,mood_9,mood_10,mood_11,instrumentalness_cat_0,instrumentalness_cat_1,instrumentalness_cat_2
0,0,23,0.494,0.756,0.0432,0.001400,0.1640,0.589,133.337,0.000000,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,1,105,0.582,0.744,0.0336,0.000458,0.0405,0.674,133.344,0.185185,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
2,2,118,0.614,0.755,0.0276,0.016200,0.0831,0.645,92.622,0.000000,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
3,3,120,0.319,0.807,0.0388,0.001040,0.1310,0.601,118.097,0.029412,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,4,292,0.496,0.527,0.0339,0.195000,0.1380,0.283,155.932,0.013889,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5972,5974,6002496,0.509,0.537,0.0384,0.347000,0.0805,0.311,170.030,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
5973,5975,6002497,0.725,0.583,0.0427,0.300000,0.0665,0.762,79.984,0.000000,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
5974,5976,6002498,0.399,0.787,0.0499,0.019700,0.0685,0.572,117.089,0.000000,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
5975,5977,6002499,0.376,0.435,0.0264,0.513000,0.1380,0.304,80.002,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [6]:
df = pd.merge(data,transitionMatrices, on='id')

In [7]:
df = df.drop(columns=['index'])

In [8]:
df

Unnamed: 0,id,danceability,energy,speechiness,acousticness,liveness,valence,tempo,0,1,...,mood_6,mood_7,mood_8,mood_9,mood_10,mood_11,instrumentalness_cat_0,instrumentalness_cat_1,instrumentalness_cat_2,category
0,23,0.494,0.756,0.0432,0.001400,0.1640,0.589,133.337,0.000000,0.000000,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0
1,105,0.582,0.744,0.0336,0.000458,0.0405,0.674,133.344,0.185185,0.000000,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1
2,118,0.614,0.755,0.0276,0.016200,0.0831,0.645,92.622,0.000000,0.000000,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0
3,120,0.319,0.807,0.0388,0.001040,0.1310,0.601,118.097,0.029412,0.000000,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0
4,292,0.496,0.527,0.0339,0.195000,0.1380,0.283,155.932,0.013889,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5972,6002496,0.509,0.537,0.0384,0.347000,0.0805,0.311,170.030,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,2
5973,6002497,0.725,0.583,0.0427,0.300000,0.0665,0.762,79.984,0.000000,0.000000,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0
5974,6002498,0.399,0.787,0.0499,0.019700,0.0685,0.572,117.089,0.000000,0.015385,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0
5975,6002499,0.376,0.435,0.0264,0.513000,0.1380,0.304,80.002,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0


In [9]:
originalSongId = 180849 # The Scientist Original
categoryOriginalSong = df[df.id == originalSongId].category.values[0]
df = df[df.category == categoryOriginalSong]

In [10]:
df

Unnamed: 0,id,danceability,energy,speechiness,acousticness,liveness,valence,tempo,0,1,...,mood_6,mood_7,mood_8,mood_9,mood_10,mood_11,instrumentalness_cat_0,instrumentalness_cat_1,instrumentalness_cat_2,category
9,469,0.664,0.787,0.0239,0.03800,0.1930,0.585,102.952,0.007634,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,2
10,588,0.726,0.537,0.1140,0.31600,0.0707,0.882,96.889,0.101351,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,2
12,635,0.550,0.145,0.0332,0.91300,0.1180,0.138,103.918,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,2
21,2063,0.805,0.765,0.0925,0.12600,0.0430,0.836,85.285,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,2
25,2535,0.543,0.912,0.0601,0.00162,0.0565,0.836,103.731,0.005682,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5964,6002488,0.760,0.838,0.0510,0.39000,0.1660,0.863,159.938,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,2
5966,6002490,0.893,0.745,0.0571,0.06420,0.0943,0.872,101.018,0.000000,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,2
5967,6002491,0.665,0.515,0.0442,0.45400,0.3670,0.805,93.082,0.000000,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,2
5972,6002496,0.509,0.537,0.0384,0.34700,0.0805,0.311,170.030,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,2


In [11]:
df[(df.id > 5000000) & (df.id < 6000000)]

Unnamed: 0,id,danceability,energy,speechiness,acousticness,liveness,valence,tempo,0,1,...,mood_6,mood_7,mood_8,mood_9,mood_10,mood_11,instrumentalness_cat_0,instrumentalness_cat_1,instrumentalness_cat_2,category
3484,5000011,0.503,0.123,0.0333,0.879,0.114,0.28,145.843,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,2
3490,5000020,0.493,0.27,0.0278,0.244,0.123,0.176,73.001,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,2


In [12]:
df = df.drop(columns=['category'])

In [13]:
df.reset_index(drop=True, inplace=True)

In [14]:
df

Unnamed: 0,id,danceability,energy,speechiness,acousticness,liveness,valence,tempo,0,1,...,mood_5,mood_6,mood_7,mood_8,mood_9,mood_10,mood_11,instrumentalness_cat_0,instrumentalness_cat_1,instrumentalness_cat_2
0,469,0.664,0.787,0.0239,0.03800,0.1930,0.585,102.952,0.007634,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1,588,0.726,0.537,0.1140,0.31600,0.0707,0.882,96.889,0.101351,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
2,635,0.550,0.145,0.0332,0.91300,0.1180,0.138,103.918,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
3,2063,0.805,0.765,0.0925,0.12600,0.0430,0.836,85.285,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0
4,2535,0.543,0.912,0.0601,0.00162,0.0565,0.836,103.731,0.005682,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1225,6002488,0.760,0.838,0.0510,0.39000,0.1660,0.863,159.938,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0
1226,6002490,0.893,0.745,0.0571,0.06420,0.0943,0.872,101.018,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
1227,6002491,0.665,0.515,0.0442,0.45400,0.3670,0.805,93.082,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
1228,6002496,0.509,0.537,0.0384,0.34700,0.0805,0.311,170.030,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [15]:
## Removing unamed and other unnecessary columns
df = df.sort_values('id')
df.id = df.id.astype('int64')
# Saving idChords
ids = df.id
df = df.set_index('id')

In [16]:
ids

0           469
1           588
2           635
3          2063
4          2535
         ...   
1225    6002488
1226    6002490
1227    6002491
1228    6002496
1229    6002500
Name: id, Length: 1230, dtype: int64

## Pipeline

In [17]:
df.columns

Index(['danceability', 'energy', 'speechiness', 'acousticness', 'liveness',
       'valence', 'tempo', '0', '1', '2',
       ...
       'mood_5', 'mood_6', 'mood_7', 'mood_8', 'mood_9', 'mood_10', 'mood_11',
       'instrumentalness_cat_0', 'instrumentalness_cat_1',
       'instrumentalness_cat_2'],
      dtype='object', length=1074)

In [18]:
df = removeConstantFeatures(df)
df.columns

Index(['danceability', 'energy', 'speechiness', 'acousticness', 'liveness',
       'valence', 'tempo', '0', '1', '2',
       ...
       'mood_5', 'mood_6', 'mood_7', 'mood_8', 'mood_9', 'mood_10', 'mood_11',
       'instrumentalness_cat_0', 'instrumentalness_cat_1',
       'instrumentalness_cat_2'],
      dtype='object', length=682)

In [19]:
dfEuclidean, dfJaccard = getEuclideanJaccardDf(df)

In [20]:
dfEuclidean = applyBoxCoxFeatures(dfEuclidean, ['speechiness', 'acousticness', 'liveness', 'valence'])
dfEuclidean = centerAndScale(dfEuclidean)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  "    return df"
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ]


In [21]:
dfEuclidean

Unnamed: 0,0,1,10,1000,1001,1002,1004,1005,1007,1008,...,995,998,999,acousticness,danceability,energy,liveness,speechiness,tempo,valence
0,-0.399148,-0.116383,-0.040316,-0.10906,-0.028525,-0.028525,-0.089443,-0.054826,-0.06907,-0.336594,...,-0.039535,-0.082602,-0.103825,-1.041044,0.762722,0.915656,0.595037,-1.485852,-0.554844,0.576238
1,0.822660,-0.116383,-0.040316,-0.10906,-0.028525,-0.028525,-0.089443,-0.054826,-0.06907,-0.336594,...,-0.039535,-0.082602,-0.103825,0.203990,1.171472,-0.173322,-1.213839,1.914989,-0.760605,1.630632
2,-0.498668,-0.116383,-0.040316,-0.10906,-0.028525,-0.028525,-0.089443,-0.054826,-0.06907,-0.336594,...,-0.039535,-0.082602,-0.103825,1.509879,0.011151,-1.880839,-0.312821,-0.579255,-0.522061,-1.508342
3,-0.498668,-0.116383,-0.040316,-0.10906,-0.028525,-0.028525,-0.089443,-0.054826,-0.06907,-0.336594,...,-0.039535,-0.082602,-0.103825,-0.568370,1.692298,0.819826,-1.920314,1.697455,-1.154412,1.479804
4,-0.424594,-0.116383,-0.040316,-0.10906,-0.028525,-0.028525,-0.089443,-0.054826,-0.06907,-0.336594,...,-0.039535,-0.082602,-0.103825,-1.265268,-0.034998,1.460145,-1.556377,0.947522,-0.528407,1.479804
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1225,-0.498668,-0.116383,-0.040316,-0.10906,-0.028525,-0.028525,-0.089443,-0.054826,-0.06907,-0.336594,...,-0.039535,-0.082602,-0.103825,0.438591,1.395625,1.137807,0.323884,0.565536,1.379100,1.568829
1226,-0.498668,-0.116383,-0.040316,-0.10906,-0.028525,-0.028525,-0.089443,-0.054826,-0.06907,0.504533,...,-0.039535,-0.082602,-0.103825,-0.890681,2.272458,0.732708,-0.723078,0.833210,-0.620479,1.598190
1227,-0.498668,-0.116383,-0.040316,-0.10906,-0.028525,-0.028525,-0.089443,-0.054826,-0.06907,-0.336594,...,-0.039535,-0.082602,-0.103825,0.619016,0.769315,-0.269152,1.522277,0.201117,-0.889804,1.375800
1228,-0.498668,-0.116383,-0.040316,-0.10906,-0.028525,-0.028525,-0.089443,-0.054826,-0.06907,-0.336594,...,-0.039535,-0.082602,-0.103825,0.305963,-0.259151,-0.173322,-0.998971,-0.176846,1.721594,-0.609672


In [22]:
dfEuclidean

Unnamed: 0,0,1,10,1000,1001,1002,1004,1005,1007,1008,...,995,998,999,acousticness,danceability,energy,liveness,speechiness,tempo,valence
0,-0.399148,-0.116383,-0.040316,-0.10906,-0.028525,-0.028525,-0.089443,-0.054826,-0.06907,-0.336594,...,-0.039535,-0.082602,-0.103825,-1.041044,0.762722,0.915656,0.595037,-1.485852,-0.554844,0.576238
1,0.822660,-0.116383,-0.040316,-0.10906,-0.028525,-0.028525,-0.089443,-0.054826,-0.06907,-0.336594,...,-0.039535,-0.082602,-0.103825,0.203990,1.171472,-0.173322,-1.213839,1.914989,-0.760605,1.630632
2,-0.498668,-0.116383,-0.040316,-0.10906,-0.028525,-0.028525,-0.089443,-0.054826,-0.06907,-0.336594,...,-0.039535,-0.082602,-0.103825,1.509879,0.011151,-1.880839,-0.312821,-0.579255,-0.522061,-1.508342
3,-0.498668,-0.116383,-0.040316,-0.10906,-0.028525,-0.028525,-0.089443,-0.054826,-0.06907,-0.336594,...,-0.039535,-0.082602,-0.103825,-0.568370,1.692298,0.819826,-1.920314,1.697455,-1.154412,1.479804
4,-0.424594,-0.116383,-0.040316,-0.10906,-0.028525,-0.028525,-0.089443,-0.054826,-0.06907,-0.336594,...,-0.039535,-0.082602,-0.103825,-1.265268,-0.034998,1.460145,-1.556377,0.947522,-0.528407,1.479804
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1225,-0.498668,-0.116383,-0.040316,-0.10906,-0.028525,-0.028525,-0.089443,-0.054826,-0.06907,-0.336594,...,-0.039535,-0.082602,-0.103825,0.438591,1.395625,1.137807,0.323884,0.565536,1.379100,1.568829
1226,-0.498668,-0.116383,-0.040316,-0.10906,-0.028525,-0.028525,-0.089443,-0.054826,-0.06907,0.504533,...,-0.039535,-0.082602,-0.103825,-0.890681,2.272458,0.732708,-0.723078,0.833210,-0.620479,1.598190
1227,-0.498668,-0.116383,-0.040316,-0.10906,-0.028525,-0.028525,-0.089443,-0.054826,-0.06907,-0.336594,...,-0.039535,-0.082602,-0.103825,0.619016,0.769315,-0.269152,1.522277,0.201117,-0.889804,1.375800
1228,-0.498668,-0.116383,-0.040316,-0.10906,-0.028525,-0.028525,-0.089443,-0.054826,-0.06907,-0.336594,...,-0.039535,-0.082602,-0.103825,0.305963,-0.259151,-0.173322,-0.998971,-0.176846,1.721594,-0.609672


In [23]:
dfJaccard

Unnamed: 0_level_0,key_0,key_1,key_2,key_3,key_4,key_5,key_6,key_7,key_8,key_9,...,mood_5,mood_6,mood_7,mood_8,mood_9,mood_10,mood_11,instrumentalness_cat_0,instrumentalness_cat_1,instrumentalness_cat_2
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
469,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
588,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
635,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
2063,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0
2535,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6002488,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0
6002490,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
6002491,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
6002496,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


### Original Song & Cover Song

In [24]:
originalSongId = 180849 # The Scientist Original
originalSongIndex,originalSongVector = getIndexAndVector(originalSongId, ids, df)

In [25]:
coverSongId = 5000011 # The Scientist Original
coverSongIndex,coverSongVector = getIndexAndVector(coverSongId, ids, df)

### Get Rankings

In [26]:
ranksDFEuclidean  = applyEuclideanDistance(dfEuclidean, ids, originalSongVector)


In [29]:
ranksDFEuclidean

Unnamed: 0,id,rank
0,1705925,143.503342
1,6001475,143.693277
2,476873,143.709481
3,1713294,143.856715
4,1240823,144.174889
...,...,...
1225,6000685,172.659068
1226,6002192,172.987778
1227,6000065,179.434589
1228,6002206,180.341479


In [28]:
dfJaccard

Unnamed: 0_level_0,key_0,key_1,key_2,key_3,key_4,key_5,key_6,key_7,key_8,key_9,...,mood_5,mood_6,mood_7,mood_8,mood_9,mood_10,mood_11,instrumentalness_cat_0,instrumentalness_cat_1,instrumentalness_cat_2
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
469,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
588,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
635,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
2063,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0
2535,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6002488,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0
6002490,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
6002491,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
6002496,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [33]:
ranksDFJaccard  = applyJaccard(dfJaccard, ids, originalSongVector[encodedFeaturesNames])

In [34]:
ranksDFJaccard

Unnamed: 0,id,rank
0,631388,0.000000
1,3653915,0.000000
2,450492,0.000000
3,180849,0.000000
4,1031439,0.333333
...,...,...
1225,6000801,1.000000
1226,22350,1.000000
1227,700572,1.000000
1228,6001083,1.000000


In [35]:
ranksDFJaccard[ranksDFJaccard['id']==coverSongId]

Unnamed: 0,id,rank
414,5000011,0.75


In [30]:
ranksDFEuclidean[ranksDFEuclidean['id']==coverSongId]

Unnamed: 0,id,rank
126,5000011,145.537537


### Cover songs

In [38]:
ranksDFEuclidean[ranksDFEuclidean['id'] > 5000000]

Unnamed: 0,id,rank
1,6001475,143.693277
6,6000372,144.227673
9,6002076,144.269600
12,6001410,144.337647
15,6001043,144.418354
...,...,...
1225,6000685,172.659068
1226,6002192,172.987778
1227,6000065,179.434589
1228,6002206,180.341479


In [40]:
ranksDFJaccard[ranksDFJaccard['id'] > 5000000]

Unnamed: 0,id,rank
6,6001770,0.333333
10,6000491,0.333333
14,6000097,0.333333
20,6000423,0.333333
22,5000020,0.333333
...,...,...
1222,6001272,0.888889
1223,6001224,0.888889
1224,6001417,1.000000
1225,6000801,1.000000
