# Using continous / categorical / transition matrices to get recommendations / Selective Box Cox

In [40]:
import pandas as pd
from sklearn.cluster import KMeans
import seaborn as sns
from matplotlib import pyplot as plt 
import math
from ipynb.fs.full.Utils import createKDeplot, loadDataFrames, dropFeaturesWithNoVariance
import numpy as np
from sklearn.preprocessing import StandardScaler
from scipy import stats
from ipynb.fs.full.Pipeline import removeConstantFeatures, applyBoxCoxAllFeatures, centerAndScale, applyEuclideanDistance, getIndexAndVector, applyBoxCoxFeatures

In [41]:
df = pd.read_csv('./data/05-allDataContinousCategoricalTransition.csv')

In [42]:
## Removing unamed and other unnecessary columns
df = df.iloc[:,2:]
df = df.sort_values('id')
df.id = df.id.astype('int64')
# Saving idChords
ids = df.id
df = df.set_index('id')
df.reset_index(drop=True, inplace=True)

In [43]:
df.head(1)

Unnamed: 0,danceability,energy,speechiness,acousticness,liveness,valence,tempo,0,1,2,...,mood_5,mood_6,mood_7,mood_8,mood_9,mood_10,mood_11,instrumentalness_cat_0,instrumentalness_cat_1,instrumentalness_cat_2
0,0.494,0.756,0.0432,0.0014,0.164,0.589,133.337,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


## Pipeline

In [44]:
df = removeConstantFeatures(df)
df= applyBoxCoxFeatures(df, ['speechiness', 'acousticness', 'liveness', 'valence'])
df = centerAndScale(df)

### Original Song & Cover Song

In [45]:
originalSongId = 180849 # The Scientist Original
originalSongIndex,originalSongVector = getIndexAndVector(originalSongId, ids, df)
coverSongId = 1686718 # THe Scientist by Boyce Avenue
coverIndex,coverSongVector = getIndexAndVector(coverSongId, ids, df)

### Get Rankings

In [46]:
ranksDF  = applyEuclideanDistance(df, ids, originalSongVector)

In [47]:
ranksDF.head(10)

Unnamed: 0,id,rank
0,180849,0.0
1,466649,8.322705
2,1029726,8.369044
3,672021,8.537192
4,872773,8.665417
5,450492,8.725361
6,2065851,8.849042
7,1025154,8.902397
8,5000019,9.193334
9,1884102,9.249458


In [48]:
ranksDF[ranksDF['id']==coverSongId]

Unnamed: 0,id,rank
88,1686718,10.345031


### Cover songs

In [49]:
ranksDF[ranksDF['id'] > 5000000]

Unnamed: 0,id,rank
8,5000019,9.193334
10,5000017,9.312466
68,5000018,10.161455
109,5000013,10.466779
421,5000001,11.433449
570,5000011,11.748643
589,5000012,11.786609
3019,5000020,57.597554
3021,5000008,58.279227
3088,5000007,70.662019
