In [9]:
import pandas as pd
import numpy as np
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import OneHotEncoder, Normalizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from ucimlrepo import fetch_ucirepo 
import lime
from lime import lime_tabular

In [2]:
# fetch dataset 
split_size = 0.3
seed = 1234567

wine = fetch_ucirepo(id=109) 
  
# data (as pandas dataframes) 
X = wine.data.features 
y = wine.data.targets 

n_classes = len(y['class'].unique())
n_feats = X.shape[1]
x_train, x_test, y_train, y_test = train_test_split(X, y, stratify = y, test_size = split_size, random_state = seed)

normalizer = Normalizer().fit(x_train)
encoder = OneHotEncoder().fit(y_train)

x_train = normalizer.transform(x_train)
x_test = normalizer.transform(x_test)
y_train = encoder.transform(y_train)
y_test  = encoder.transform(y_test)

In [3]:
n_classes, n_feats

(3, 13)

In [4]:
def evaluate_model(model, x, y):
    pred = model.predict(x)
    return accuracy_score(y, pred)

In [37]:
model = MLPClassifier((n_feats, 256, n_classes),
                      activation = 'relu',
                      learning_rate = 'adaptive', 
                      learning_rate_init=0.029, 
                      max_iter = 1000,
                      random_state=3)
model.fit(x_train, y_train)

acc_train = evaluate_model(model, x_train, y_train)
acc_test  = evaluate_model(model, x_test, y_test)

print(acc_train, acc_test)

0.5967741935483871 0.6296296296296297


In [38]:
x_test[0].shape

(13,)

In [40]:
model.predict_proba(x_test[0][None, :])

array([[0.58010298, 0.08305055, 0.30143897]])

In [47]:
explainer = lime_tabular.LimeTabularExplainer(
    training_data = x_train,
    mode = 'classification'
)

exp = explainer.explain_instance(x_test[1], model.predict_proba, num_features = n_feats)

                    Prediction probabilties do not sum to 1, and
                    thus does not constitute a probability space.
                    Check that you classifier outputs probabilities
                    (Not log probabilities, or actual class predictions).
                    


In [48]:
exp

<lime.explanation.Explanation at 0x22a8a87d890>

In [49]:
exp.local_exp

{1: [(9, -0.15952170160613696),
  (3, 0.1329458790044254),
  (8, 0.060529083672368346),
  (1, 0.05669195189524648),
  (6, 0.04364641122087194),
  (4, -0.04050503728340228),
  (10, -0.03289478469555357),
  (11, 0.01863731088836311),
  (12, -0.016017932477405235),
  (5, -0.013282552507836061),
  (2, -0.01225830559414973),
  (7, -0.006685688438323617),
  (0, -0.006682622537938497)]}

In [56]:
sum([abs(x[1]) for x in exp.local_exp[1]])

0.6002992618220212

In [None]:
# TODO
# add bayesian hyperparameter optimization for finding optimal number of components in gaussian mixture model (https://www.run.ai/guides/hyperparameter-tuning/bayesian-hyperparameter-optimization)
# do sampling and hyperparameter exstimation