In [None]:
%load_ext autoreload
%autoreload 2

import os
import json
import pickle
import numpy as np
import sklearn.mixture

import shap
from anomshap import attribution, exptutil

In [None]:
# dataset to be examined
dataname = 'thyroid'

# number of components of GMM
n_components = 4

## Train GMM model

In [None]:
class GMM(sklearn.mixture.GaussianMixture):
  def fit(self, X, y=None):
    super().fit(X)
    self.logdet_covs_ = np.zeros(self.n_components)
    for i in range(self.n_components):
        _ , self.logdet_covs_[i] = np.linalg.slogdet(self.covariances_[i])
    return self

In [None]:
data_train = np.loadtxt(os.path.join('data', 'features', dataname, 'data_train.txt'), ndmin=2)
dim_x = data_train.shape[1]

outdir = os.path.join('models', 'gmm', dataname)
if not os.path.isdir(outdir):
    os.makedirs(outdir)

model = GMM(n_components=n_components, covariance_type='full')
model.fit(data_train)

print('trained GMM model')

## Compute anomaly attribution

In [None]:
# load data
data_test = np.loadtxt(os.path.join('data', 'features', dataname, 'data_test.txt'), ndmin=2)

# use only normal part (first half) of test data
data_test = data_test[:int(data_test.shape[0]/2)]

# set attributer
AT = attribution.Attributer(dim_x, model, 'gmm', 'energy',
    trdata_example=shap.kmeans(data_train, 8).data, trdata_mean=np.mean(data_train,axis=0))


In [None]:
np.random.seed(123)

# index of sample / feature to be perturbed
target_sample_idx = np.random.permutation(data_test.shape[0])[0]
target_feature_idx = np.random.permutation(data_test.shape[1])[0]

# perturb values
perturb_bound = [1.0, 2.0]
target_sample = np.copy(data_test[target_sample_idx])
sign=[-1,1]; sign = sign[np.random.choice(2)]
perturbation = sign*(np.random.rand()*(perturb_bound[1]-perturb_bound[0])+perturb_bound[0])
target_sample[target_feature_idx] += perturbation

# compute attribution
attr_kwargs = {
    'anomshap_bl_regparam': 1e-2,
    'anomshap_bl_learnrate': 0.1,
    'methods': ['margscore', 'kernshap', 'anomshap'],
    'kernshap_num_sample': 'auto',
    'anomshap_num_sample': 'auto',
}
attr, info = AT.attribute(target_sample, 1e50, **attr_kwargs)

# plot
exptutil.plot_attr(attr, anofeats=[target_feature_idx,])
'''
^ this plots anomaly-score attribution of each feature by each method, where orange bar is the anomalous one
margscore = energy of marginal of GMMs
kernshap = attribution by kernel SHAP
anomshap = attribution by the proposed method
'''
