In [1]:
import os
import sys

import numpy as np
import pandas as pd

sys.path.insert(0, '..')
from final_model.FullModel import Model

### Load and filter data to selected features

In [2]:
df_path = os.path.join(os.getcwd(), os.pardir, 'data', 'data.csv')
df = pd.read_csv(df_path)

features = ['acousticness', 'danceability','energy',
            'instrumentalness', 'key', 'liveness', 'loudness',
            'mode','speechiness', 'tempo',
            'valence']
df_train = df[features]

df_train.head()

Unnamed: 0,acousticness,danceability,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,valence
0,0.995,0.708,0.195,0.563,10,0.151,-12.428,1,0.0506,118.469,0.779
1,0.994,0.379,0.0135,0.901,8,0.0763,-28.454,1,0.0462,83.972,0.0767
2,0.604,0.749,0.22,0.0,5,0.119,-19.924,0,0.929,107.177,0.88
3,0.995,0.781,0.13,0.887,1,0.111,-14.734,0,0.0926,108.003,0.72
4,0.99,0.21,0.204,0.908,11,0.098,-16.829,1,0.0424,62.149,0.0693


### Select a test case and pre-process for the model

It needs be a 2 dimensional array.

In [3]:
test = np.array(df.iloc[20215][features])
test = test.reshape(1, -1)
test

array([[0.203, 0.609, 0.44299999999999995, 0.0010400000000000001, 11,
        0.183, -11.478, 0, 0.0305, 122.792, 0.21100000000000002]],
      dtype=object)

### Test the model production code

The model is presented as a single pipeline using an object. It loads the three saved components -- the scaler, encoder, and the nearest neighbors layers. This gives us an end to end model that we can test here.

Once the input is in proper shape, it just takes two lines of code to make a prediction through the model.

In [4]:
model = Model()
scores, indices = model.predict(test)

# Create a table with the top similar items predicted by the model. In this case the first item 
# the one queried on.

results = []
for i, index in enumerate(indices[0]):
    track_id = df.iloc[index]['id']
    artists = df.iloc[index]['artists']
    title = df.iloc[index]['name']
    results.append({'index': index,
                    'track_id': track_id,
                    'artists': artists,
                    'title': title,
                    'score': scores[0][i]})
    
result_table = pd.DataFrame(results)
result_table.sort_values(by='score')

Unnamed: 0,index,track_id,artists,title,score
0,20215,4nTXzIW8EjH0V1NBxyhatX,['Grateful Dead'],Lost Sailor - 2013 Remaster,2.040426e-08
1,26656,6CbTn7URluWsFxKsLNRsRV,['The Who'],Bald Headed Woman - Stereo Version,0.009654198
2,36187,6olW9Q4qX8ztoSYLr3UXUp,['The Clash'],Broadway - Remastered,0.01345226
3,167546,5wYAof2gsaOvwNJ4PWZWxP,['Ween'],It's Gonna Be (Alright),0.01401881
4,131450,6yMh8Xi6hhSNXtPe2e7tlj,['Bruce Springsteen'],Point Blank,0.01601156
5,7358,33Wx1OQWGptIxD4dEA4PEK,['Citizen'],The Night I Drove Alone,0.0174882
6,121330,2ON80h9ZjmY0WBpVz8OPjU,['Pink Floyd'],The Gold It's in the...,0.01822472
7,93872,3CSLUIrI2KPMulHR5bh17O,['Quarterflash'],Harden My Heart,0.01941204
8,125451,78iVPIyFgGjQ0g0TmDWYtQ,"['Maribou State', 'Holly Walker']",Tongue,0.01989936
9,94952,0uOPGU4CbYxzFxn6T7sblW,"['Shai', 'Bill Appleberry']",If I Ever Fall In Love,0.02033446
