# Using continous / categorical / transition matrices to get recommendations

In [1]:
import pandas as pd
from sklearn.cluster import KMeans
import seaborn as sns
from matplotlib import pyplot as plt 
import math
from ipynb.fs.full.Utils import createKDeplot, loadDataFrames, dropFeaturesWithNoVariance
import numpy as np
from sklearn.preprocessing import StandardScaler
from scipy import stats
from ipynb.fs.full.Pipeline import removeConstantFeatures, applyBoxCoxAllFeatures, centerAndScale, applyEuclideanDistance, getIndexAndVector

In [2]:
df = pd.read_csv('./data/05-allDataContinousCategoricalTransition.csv')

In [3]:
## Removing unamed and other unnecessary columns
df = df.iloc[:,2:].sort_values('id')
df.id = df.id.astype('int64')
# Saving idChords
ids = df.id
df = df.set_index('id')
df.reset_index(drop=True, inplace=True)

In [4]:
df.head(1)

Unnamed: 0,danceability,energy,speechiness,acousticness,liveness,valence,tempo,0,1,2,...,mood_5,mood_6,mood_7,mood_8,mood_9,mood_10,mood_11,instrumentalness_cat_0,instrumentalness_cat_1,instrumentalness_cat_2
0,0.494,0.756,0.0432,0.0014,0.164,0.589,133.337,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


## Pipeline

In [5]:
df = removeConstantFeatures(df)
df = centerAndScale(df)

### Original Song & Cover Song

In [6]:
originalSongId = 180849 # The Scientist Original
originalSongIndex,originalSongVector = getIndexAndVector(originalSongId, ids, df)
coverSongId = 1686718 # THe Scientist by Boyce Avenue
coverIndex,coverSongVector = getIndexAndVector(coverSongId, ids, df)

In [7]:
ranksDF  = applyEuclideanDistance(df, ids, originalSongVector)

[[23, 41.47047869450378], [105, 20.011576191841012], [118, 28.079573928488106], [120, 58.43500513237254], [292, 52.36827446942614], [407, 13.858339046473182], [418, 19.912064272419073], [456, 11.91499972571235], [467, 54.90341141731892], [469, 20.616160770471172], [635, 13.527440594470534], [652, 25.4670091098099], [816, 14.668384727999157], [830, 31.911175372474087], [836, 22.155819986820237], [1334, 14.87266675018338], [1345, 13.216609474384354], [1395, 11.037074463382849], [1433, 14.239606665044134], [2147, 20.87828133568984], [2148, 23.292294337222685], [2220, 15.547739960615479], [2535, 12.187401852700237], [3118, 20.5696499192764], [3165, 10.59078044779707], [3337, 12.802447752881632], [3991, 23.196129951053756], [4019, 11.384210009702109], [4041, 13.081981381512065], [4084, 17.44127906961726], [4169, 52.61071527508497], [5643, 13.07695688084801], [5909, 10.886595227945095], [6090, 18.21109382255794], [6097, 15.404484778012371], [6109, 19.09878720154867], [6122, 16.6450885077239]

In [8]:
ranksDF.head(10)

Unnamed: 0,id,rank
0,180849,0.0
1,1029726,8.320151
2,672021,8.607433
3,450492,8.619808
4,872773,8.620368
5,2065851,8.731517
6,1025154,8.870613
7,1884102,8.963208
8,5000019,9.105609
9,757984,9.155899


In [9]:
ranksDF[ranksDF['id']==coverSongId]

Unnamed: 0,id,rank
92,1686718,10.288917


## Rank for original Song 450

In [None]:
ranksDF[ranksDF['id'] > 5000000]