In [1]:
import collections
import time

import numpy as np
import pandas as pd

# import tensorflow.compat.v1 as tf
# tf.disable_eager_execution()
from tensorflow.python.training import adagrad
import tensorflow.compat.v1.keras.backend
import shap

from ciml import gather_results
from ciml import tf_trainer

In [17]:
# Dataset, experiment and model for the CIML Data
data_path = '/Users/andreafrittoli/ciml/cimlodsceu2019seed'
dataset = 'usr_1m-1min-status'
experiment = 'dnn-5x100-500epochs-bs128'
model_dir = gather_results.get_model_folder(dataset, experiment)

In [18]:
# Load the experiment data
experiment_data = gather_results.load_experiment(
        experiment, data_path=data_path)

In [29]:
# Load Data
training_data = gather_results.load_dataset(dataset, 'training', data_path=data_path)
test_data = gather_results.load_dataset(dataset, 'test', data_path=data_path)
labels = gather_results.load_dataset(dataset, 'labels', data_path=data_path)['labels']

X_train = training_data['examples']
Y_train = training_data['classes']
X_train_red = shap.sample(X_train, 100)
X_test = test_data['examples']
Y_test = test_data['classes']
X_test_red = shap.sample(X_test, 100)

# Helper to display accuracy from the dataset
def print_accuracy(f):
    print("Accuracy = {0}%".format(100*np.sum(f(X_test) == Y_test)/len(Y_test)))
    time.sleep(0.5) # to let the print get out before any progress bars

shap.initjs()

In [None]:
# Create the estimator
estimator = experiment_data['estimator']
hyper_params = experiment_data['hyper_params']
params = experiment_data['params']
steps = int(hyper_params['steps'])
num_epochs = int(hyper_params['epochs'])
batch_size = int(hyper_params['batch_size'])
optimizer = hyper_params['optimizer']
learning_rate = float(hyper_params['learning_rate'])
label_vocabulary = None

estimator = tf_trainer.get_estimator(
        estimator, hyper_params, params, labels, model_dir,
        optimizer=adagrad.AdagradOptimizer(learning_rate=0.05),
        label_vocabulary=label_vocabulary, gpu=False)

In [24]:
# Prepare the input function
input_fn=tf_trainer.get_input_fn(shuffle=True,
                    batch_size=batch_size, num_epochs=num_epochs,
                    labels=labels, **training_data)





In [25]:
# Run the training
training_result = tf_trainer.get_training_method(estimator)(
                    input_fn=tf_trainer.get_input_fn(shuffle=True,
                    batch_size=batch_size, num_epochs=num_epochs,
                    labels=labels, **training_data), steps=steps)

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
INFO:tensorflow:Calling model_fn.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local

In [26]:
# Build a prediction function suitable for the KernelExplainer
def model_fn(data):
    output = estimator.predict(tf_trainer.get_input_fn(
        examples=data, example_ids=None, classes=None, labels=labels,
        batch_size=128, num_epochs=1))
    return np.array([int(x['classes']) for x in output])

In [30]:
# Verify the function fits our data
result = model_fn(X_train_red)
result

INFO:tensorflow:Calling model_fn.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /System/Volumes/Data/git/github.com/mtreinish/ciml/ciml/../data/usr_1m-1min-status/dnn-5x100-500epochs-bs128/model.ckpt-9286
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [31]:
# Run the explainer
print_accuracy(model_fn)

explainer = shap.KernelExplainer(model_fn, X_train_red)
shap_values = explainer.shap_values(X_test_red)
shap.force_plot(explainer.expected_value[0], shap_values[0], X_test_red)

_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /System/Volumes/Data/git/github.com/mtreinish/ciml/ciml/../data/usr_1m-1min-status/dnn-5x100-500epochs-bs128/model.ckpt-9286
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
 91%|█████████ | 91/100 [1:35:31<09:41, 64.66s/it]INFO:tensorflow:Calling model_fn.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /System/Volumes/Data/git/github.com/mtreinish/ciml/ciml/../data/usr_1m-1min-status/dnn-5x100-500epochs-bs128/model.ckpt-9286
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_o

TypeError: 'float' object is not subscriptable

In [43]:
shap.force_plot(explainer.expected_value, shap_values, X_test_red)

In [46]:
shap_values.shape

(100, 184)

In [47]:
X_test_red.shape

(100, 184)

In [51]:
np.savez_compressed("shap_values.npz", shap_values=shap_values, X_test_red=X_test_red, expected_value=explainer.expected_value, labels=labels)

In [None]:
shap.summary_plot(shap_values, X_test_red, feature_names=labels, plot_type="bar", max_display=184, sort=False)