# Imports and Setup

In [None]:
import os
os.chdir('..')

In [None]:
import pandas as pd
import numpy as np

from sklearn.neighbors import KNeighborsClassifier
import shap

from constants import DATA_DIR, DROP_COLS
from modeling.load_train_test import load_train_test
from modeling.predict_and_evaluate import predict_and_evaluate

np.random.seed(42)
shap.initjs()

# Create and train model
##### Note: To change models, only this cell needs to be updated

In [None]:
X_train, X_test, y_train, y_test = load_train_test(scale=False, upsample=True, encode=True)

HYPERPARAMS = {
    'n_neighbors': 3, 
}

model = KNeighborsClassifier(n_neighbors=HYPERPARAMS['n_neighbors'])
model.fit(X_train, y_train)

# Predict out-of-sample genres and evaluate accuracy

In [None]:
y_pred, y_prob = predict_and_evaluate(model, new_data=X_test.values, truth_data=y_test)

In [None]:
## For artists with only one genre:
one_genre_artists_X = X_test[y_test.apply(len) == 1]
one_genre_artists_y = y_test[y_test.apply(len) == 1]
y_pred, y_prob = predict_and_evaluate(model, new_data=one_genre_artists_X, truth_data=one_genre_artists_y)

# Explore preds

In [None]:
display(y_pred.head())
display(y_test.head())

In [None]:
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X_train)

In [None]:
OBS = 4
shap.force_plot(explainer.expected_value[OBS], shap_values[OBS][0, :], X_train.iloc[0, :])

In [None]:
X_train

In [None]:
#shap_obj = explainer(X_train)
#shap.plots.beeswarm(shap_obj)