# Using continous / categorical / transition matrices to get recommendations / Selective Box Cox

In [38]:
import pandas as pd
from sklearn.cluster import KMeans
import seaborn as sns
from matplotlib import pyplot as plt 
import math
from ipynb.fs.full.Utils import createKDeplot, loadDataFrames, dropFeaturesWithNoVariance, loadCoverGroups
import numpy as np
from sklearn.preprocessing import StandardScaler
from scipy import stats
from ipynb.fs.full.Pipeline import removeConstantFeatures, applyBoxCoxAllFeatures, centerAndScale, applyEuclideanDistance, getIndexAndVector, applyBoxCoxFeatures

In [39]:
df = pd.read_csv('./data/unified/05-allDataContinousCategoricalTransition.csv')

In [40]:
## Removing unamed and other unnecessary columns
df = df.iloc[:,2:]
df = df.sort_values('id')
df.id = df.id.astype('int64')
# Saving idChords
ids = df.id
df = df.set_index('id')
df.reset_index(drop=True, inplace=True)

## Pipeline

In [41]:
df = removeConstantFeatures(df)
df= applyBoxCoxFeatures(df, ['speechiness', 'acousticness', 'liveness', 'valence'])
df = centerAndScale(df)

### Original Song & Cover Song

In [42]:
originalSongId = 180849 # The Scientist Original
originalSongIndex,originalSongVector = getIndexAndVector(originalSongId, ids, df)
coverSongId = 1686718 # THe Scientist by Boyce Avenue
coverIndex,coverSongVector = getIndexAndVector(coverSongId, ids, df)

In [43]:
originalSongIndex

688

### Get Rankings

In [44]:
ranksDF  = applyEuclideanDistance(df, ids, originalSongVector)

In [45]:
ranksDF.head(10)

Unnamed: 0,id,rank
0,180849,0.0
1,6001027,7.55821
2,672021,8.097575
3,1029726,8.322826
4,466649,8.33265
5,2065851,8.403237
6,1025154,8.530632
7,1884102,8.597437
8,872773,8.774542
9,3504560,8.842531


In [46]:
ranksDF[ranksDF['id']==coverSongId]

Unnamed: 0,id,rank
75,1686718,10.171142


### Cover songs

In [47]:
ranksDF[ranksDF['id'] > 5000000]

Unnamed: 0,id,rank
1,6001027,7.558210
11,5000019,8.985209
12,6001258,9.175746
17,5000001,9.315289
19,5000017,9.342338
...,...,...
5972,6002253,172.556603
5973,6000743,173.151095
5974,6002278,177.391709
5975,6001117,188.703785
