# Using continous / categorical / transition matrices to get recommendations

In [2]:
import pandas as pd
from sklearn.cluster import KMeans
import seaborn as sns
from matplotlib import pyplot as plt 
import math
from ipynb.fs.full.Utils import createKDeplot, loadDataFrames, dropFeaturesWithNoVariance
import numpy as np
from sklearn.preprocessing import StandardScaler
from scipy import stats
from ipynb.fs.full.Pipeline import removeConstantFeatures, applyBoxCoxAllFeatures, centerAndScale, applyEuclideanDistance, getIndexAndVector

In [3]:
df = pd.read_csv('./data/05-allDataContinousCategoricalTransition.csv')

In [86]:
## Removing unamed and other unnecessary columns
df = df.iloc[:,2:]
df = df.sort_values('id')
df.id = df.id.astype('int64')
# Saving idChords
ids = df.id
df = df.set_index('id')
df.reset_index(drop=True, inplace=True)

In [87]:
df.head(1)

Unnamed: 0,danceability,energy,speechiness,acousticness,liveness,valence,tempo,0,1,2,...,mood_5,mood_6,mood_7,mood_8,mood_9,mood_10,mood_11,instrumentalness_cat_0,instrumentalness_cat_1,instrumentalness_cat_2
0,0.494,0.756,0.0432,0.0014,0.164,0.589,133.337,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


## Pipeline

In [88]:
df = removeConstantFeatures(df)
df = applyBoxCoxAllFeatures(df)
df = centerAndScale(df)

### Original Song & Cover Song

In [89]:
originalSongId = 180849 # The Scientist Original
originalSongIndex,originalSongVector = getIndexAndVector(originalSongId, ids, df)
coverSongId = 1686718 # THe Scientist by Boyce Avenue
coverIndex,coverSongVector = getIndexAndVector(coverSongId, ids, df)

### Get Rankings

In [78]:
ranksDF  = applyEuclideanDistance(df, ids, originalSongVector)

In [79]:
ranksDF.head(10)

Unnamed: 0,id,rank
0,180849,0.0
1,1119299,22.779668
2,610562,25.368443
3,1127345,26.466739
4,748267,26.552714
5,2748744,26.888173
6,732460,26.9599
7,2065851,27.004886
8,872773,27.023669
9,466649,27.056889


In [80]:
ranksDF[ranksDF['id']==coverSongId]

Unnamed: 0,id,rank
207,1686718,27.817478


## Rank for original Song 450

In [81]:
ranksDF[ranksDF['id'] > 5000000]

Unnamed: 0,id,rank
10,5000019,27.096248
14,5000017,27.132508
199,5000013,27.806612
758,5000012,28.360239
1357,5000011,28.861835
1714,5000018,29.391147
2348,5000001,32.497193
3043,5000020,63.813986
3069,5000008,67.63301
3114,5000007,76.66028
