# Study – Glass – Stretcher: one range before the others

In [1]:
import pandas as pd
import tensorflow as tf

from milp import codify_network
from teste import get_minimal_explanation

2023-03-28 17:02:10.007933: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-28 17:02:10.191711: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-03-28 17:02:10.191730: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2023-03-28 17:02:10.221350: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-03-28 17:02:11.189066: W tensorflow/stream_executor/platform/de

In [2]:
# For type annotations
import numpy as np

In [3]:
dataset_name = 'glass'

training_data = pd.read_csv(f'datasets/{dataset_name}/train.csv')
testing_data = pd.read_csv(f'datasets/{dataset_name}/test.csv')

dataframe = pd.concat([training_data, testing_data])

keras_model = tf.keras.models.load_model(f'datasets/{dataset_name}/model_2layers_{dataset_name}.h5')

data = dataframe.to_numpy()
n_classes = dataframe['target'].nunique()

2023-03-28 17:02:12.325295: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2023-03-28 17:02:12.325412: W tensorflow/stream_executor/cuda/cuda_driver.cc:263] failed call to cuInit: UNKNOWN ERROR (303)
2023-03-28 17:02:12.325502: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (bolsista-HP-EliteDesk-800-G1-SFF): /proc/driver/nvidia/version does not exist
2023-03-28 17:02:12.326541: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
mp_model, output_bounds = codify_network(keras_model, dataframe, 'fischetti', relax_constraints=False)

### Printing What the Model Predicted

_Aka_ printing the network output.

In [5]:
# i = 134 is also a nice value to study
i = 138
print('i =', i)
network_input = data[i, :-1]
network_input = tf.reshape(tf.constant(network_input), [1, -1])
network_output = keras_model.predict(tf.constant(network_input))[0]
network_output = tf.argmax(network_output)

predictions = keras_model.predict(tf.constant(network_input))[0, 0]

print(f'Predictions: (ndarray[ndarray[{type(predictions)}]])', predictions)
classification: np.int64 = network_output.numpy()
print(f'Network output: ({type(classification)})', classification)

i = 138
Predictions: (ndarray[ndarray[<class 'numpy.float32'>]]) 0.0007575714
Network output: (<class 'numpy.int64'>) 1


### Printing the Minimal Explanation

Minimal eplanations only indicates which inputs are relevant to get to a conclusion.

**Note:** The explanation happens _after_ the keras_model make its predictions.

In [6]:
mdl_aux = mp_model.clone()

minimal_explanation = get_minimal_explanation(mdl_aux, network_input, network_output, n_classes, 'fischetti', output_bounds)
minimal_explanation

[docplex.mp.LinearConstraint[input1](x_0,EQ,2.967691214515491),
 docplex.mp.LinearConstraint[input4](x_3,EQ,-1.408120229258977),
 docplex.mp.LinearConstraint[input6](x_5,EQ,-0.790702170757714),
 docplex.mp.LinearConstraint[input7](x_6,EQ,4.24127975754059),
 docplex.mp.LinearConstraint[input8](x_7,EQ,-0.3615292659832898),
 docplex.mp.LinearConstraint[input9](x_8,EQ,-0.6037614142464092)]

> The keras_model predicted $C_1$ because:
>
> $x_0 = 2.967691214515491$,
>
> $x_3 = -1.408120229258977$,
>
> $x_5 = -0.790702170757714$, 
>
> $x_6 = 4.24127975754059$, 
>
> $x_7 = -0.3615292659832898$ and 
>
> $x_8 = -0.6037614142464092$.

### Trying to Improve the Explanation

Given a minimal explanation, can we improve it?

Constraints of type $x = c$ are equivalent to $x \le c \land x \ge c$.

Therefore, we need to substitute each $x = c$ constraint by the $x \le c$ and $x \ge c$ constraints.

Then, we try stretching the interval by substituting $x \le c$ by $x \le c + \Delta x$ and see if our prediction changes. If the prediction stays the same, then we substitue and try stretching it again. If the prediction changes, then this new interval isn't valid and we don't substitute. We found the upper bound of the interval, i.e. $x \le c$.

Then we try to stretch the interval to fin the lower bound. Analogously, We try substituting $c \ge x$ by ????/


We will end up with a pair of constraints the looks like $c - k_l \cdot \Delta{x} \le x$ and $x \le c + k_u \cdot \Delta{x}$, i.e. this pair represents $c - k_l \cdot \Delta{x} \le x \le c + k_u \cdot \Delta{x}$.

### Setting Up

In [7]:
import docplex

In [8]:
minimal_model = mdl_aux
testing_model = minimal_model.clone()

#### Quick Sratch

In [9]:
linear_constraints = testing_model.find_matching_linear_constraints('input')
linear_constraints

[docplex.mp.LinearConstraint[input1](x_0,EQ,2.967691214515491),
 docplex.mp.LinearConstraint[input4](x_3,EQ,-1.408120229258977),
 docplex.mp.LinearConstraint[input6](x_5,EQ,-0.790702170757714),
 docplex.mp.LinearConstraint[input7](x_6,EQ,4.24127975754059),
 docplex.mp.LinearConstraint[input8](x_7,EQ,-0.3615292659832898),
 docplex.mp.LinearConstraint[input9](x_8,EQ,-0.6037614142464092)]

In [10]:
linear_constraints = testing_model.find_matching_linear_constraints('input')

for constraint in linear_constraints:
	testing_model.remove_constraint(constraint)
	testing_model.add_constraint(constraint.lhs <= constraint.rhs.clone(), 'input LE')
	testing_model.add_constraint(constraint.lhs >= constraint.rhs.clone(), 'input GE')

In [11]:
linear_constraints = testing_model.find_matching_linear_constraints('input')
linear_constraints

[docplex.mp.LinearConstraint[input LE](x_0,LE,2.967691214515491),
 docplex.mp.LinearConstraint[input GE](x_0,GE,2.967691214515491),
 docplex.mp.LinearConstraint[input LE](x_3,LE,-1.408120229258977),
 docplex.mp.LinearConstraint[input GE](x_3,GE,-1.408120229258977),
 docplex.mp.LinearConstraint[input LE](x_5,LE,-0.790702170757714),
 docplex.mp.LinearConstraint[input GE](x_5,GE,-0.790702170757714),
 docplex.mp.LinearConstraint[input LE](x_6,LE,4.24127975754059),
 docplex.mp.LinearConstraint[input GE](x_6,GE,4.24127975754059),
 docplex.mp.LinearConstraint[input LE](x_7,LE,-0.3615292659832898),
 docplex.mp.LinearConstraint[input GE](x_7,GE,-0.3615292659832898),
 docplex.mp.LinearConstraint[input LE](x_8,LE,-0.6037614142464092),
 docplex.mp.LinearConstraint[input GE](x_8,GE,-0.6037614142464092)]

##### The Algorithm

In [12]:
epsilon = 0.01

variables = testing_model.find_matching_vars('x')

for constraint in linear_constraints:
	testing_model.solve()
	print('Initial constraint:' + '\t', constraint)

	variable = constraint.lhs
	while testing_model.solution is None:
		if constraint.sense == docplex.mp.constants.ComparisonType.LE:
			if constraint.rhs.constant <= variable.ub:
				constraint.rhs += epsilon
			else:
				break
		elif constraint.sense == docplex.mp.constants.ComparisonType.GE:
			if constraint.rhs.constant >= variable.lb:
				constraint.rhs -= epsilon
			else:
				break
		else:
			raise Exception('Constraint sense was neither LE nor GE')

		testing_model.solve()

	# Undo last operation
	if constraint.sense == docplex.mp.constants.ComparisonType.LE:
		constraint.rhs -= epsilon
	elif constraint.sense == docplex.mp.constants.ComparisonType.GE:
		constraint.rhs += epsilon

	print('Final constraint:' + '\t', constraint)
	print()

Initial constraint:	 input LE: x_0 <= 2.967691214515491
Final constraint:	 input LE: x_0 <= 5.127691214515445

Initial constraint:	 input GE: x_0 >= 2.967691214515491
Final constraint:	 input GE: x_0 >= 1.427691214515511

Initial constraint:	 input LE: x_3 <= -1.408120229258977
Final constraint:	 input LE: x_3 <= -1.408120229258977

Initial constraint:	 input GE: x_3 >= -1.408120229258977
Final constraint:	 input GE: x_3 >= -1.518120229258977

Initial constraint:	 input LE: x_5 <= -0.790702170757714
Final constraint:	 input LE: x_5 <= -0.790702170757714

Initial constraint:	 input GE: x_5 >= -0.790702170757714
Final constraint:	 input GE: x_5 >= -0.790702170757714

Initial constraint:	 input LE: x_6 <= 4.24127975754059
Final constraint:	 input LE: x_6 <= 4.24127975754059

Initial constraint:	 input GE: x_6 >= 4.24127975754059
Final constraint:	 input GE: x_6 >= 4.24127975754059

Initial constraint:	 input LE: x_7 <= -0.3615292659832898
Final constraint:	 input LE: x_7 <= -0.36152926598

**TODO:** Rewrite pair of expression of type $x \le c$ and $x \ge c$ to $x = c$:

In [13]:
linear_constraints = testing_model.find_matching_linear_constraints('input')
linear_constraints

[docplex.mp.LinearConstraint[input LE](x_0,LE,5.127691214515445),
 docplex.mp.LinearConstraint[input GE](x_0,GE,1.427691214515511),
 docplex.mp.LinearConstraint[input LE](x_3,LE,-1.408120229258977),
 docplex.mp.LinearConstraint[input GE](x_3,GE,-1.518120229258977),
 docplex.mp.LinearConstraint[input LE](x_5,LE,-0.790702170757714),
 docplex.mp.LinearConstraint[input GE](x_5,GE,-0.790702170757714),
 docplex.mp.LinearConstraint[input LE](x_6,LE,4.24127975754059),
 docplex.mp.LinearConstraint[input GE](x_6,GE,4.24127975754059),
 docplex.mp.LinearConstraint[input LE](x_7,LE,-0.3615292659832898),
 docplex.mp.LinearConstraint[input GE](x_7,GE,-0.3615292659832898),
 docplex.mp.LinearConstraint[input LE](x_8,LE,-0.6037614142464092),
 docplex.mp.LinearConstraint[input GE](x_8,GE,-0.6037614142464092)]

Note that `x_6` is actually equal to `4.24127975754059`

In [14]:
number_of_inputs = len(dataframe.columns.drop('target'))
for i in range(number_of_inputs):
	constraints_of_x_i = filter(lambda x: x.lhs.name == f'x_{i}', linear_constraints)
	constraints = [c for c in constraints_of_x_i]

	if len(constraints) == 2:
		if constraints[0].rhs.constant == constraints[1].rhs.constant:
			testing_model.remove_constraints(constraints)
			testing_model.add_constraint(constraints[0].lhs == constraints[0].rhs, 'input')

In [15]:
improved_explanation = testing_model.find_matching_linear_constraints('input')
improved_explanation

[docplex.mp.LinearConstraint[input LE](x_0,LE,5.127691214515445),
 docplex.mp.LinearConstraint[input GE](x_0,GE,1.427691214515511),
 docplex.mp.LinearConstraint[input LE](x_3,LE,-1.408120229258977),
 docplex.mp.LinearConstraint[input GE](x_3,GE,-1.518120229258977),
 docplex.mp.LinearConstraint[input](x_5,EQ,-0.790702170757714),
 docplex.mp.LinearConstraint[input](x_6,EQ,4.24127975754059),
 docplex.mp.LinearConstraint[input](x_7,EQ,-0.3615292659832898),
 docplex.mp.LinearConstraint[input](x_8,EQ,-0.6037614142464092)]

### Pretty Printing the Explanation

In [16]:
def get_variable_index(variable: docplex.mp.dvar.Var) -> int:
	index = variable.name.split('_')[1]
	return int(index)

In [17]:
def print_explanation(explanation: list[docplex.mp.constr.LinearConstraint]):
	for e in explanation:
		variable = e.lhs
		index = get_variable_index(variable)
		feature_name = dataframe.columns[index]
		print(feature_name, e.sense.operator_symbol, e.rhs)

In [18]:
print_explanation(improved_explanation)

RI <= 5.127691214515445
RI >= 1.427691214515511
Al <= -1.408120229258977
Al >= -1.518120229258977
K == -0.790702170757714
Ca == 4.24127975754059
Ba == -0.3615292659832898
Fe == -0.6037614142464092


## Comparing with Anchor

In [19]:
from anchor import utils

### Loading the Dataset

In [20]:
d = utils.load_csv_dataset(
	data=f'datasets/{dataset_name}/test.csv',
	target_idx=-1,
	feature_names=['RI','Na','Mg','Al','Si','K','Ca','Ba','Fe','target'],
	# categorical_features=None,
	# features_to_use=None,
	# feature_transformations=None,
	# discretize=False,
	# balance=False,
	# fill_na='-1',
	# filter_fn=None,
	skip_first=True
)

### Explainer

In [21]:
from anchor import anchor_tabular

In [40]:
explainer = anchor_tabular.AnchorTabularExplainer(
    d.class_names,
    d.feature_names,
    d.train,
    d.categorical_names)

In [41]:
predict_fn = lambda x: tf.argmax(keras_model.predict(x)[0]).numpy().reshape(1)

In [42]:
for a in d.train:
	a == data[i]

  a == data[i]


In [43]:
exp = explainer.explain_instance(data[i, :-1], predict_fn)



In [44]:
exp.names()

['-0.01 < Si <= 0.48',
 '-0.41 < RI <= 0.27',
 '-0.36 < Ca <= 0.02',
 '0.17 < Mg <= 0.60',
 '-0.57 < Al <= -0.16',
 '0.04 < K <= 0.16',
 'Na <= -0.15',
 'Ba = -0.143897526850263']

In [45]:
print_explanation(improved_explanation)

RI <= 5.127691214515445
RI >= 1.427691214515511
Al <= -1.408120229258977
Al >= -1.518120229258977
K == -0.790702170757714
Ca == 4.24127975754059
Ba == -0.3615292659832898
Fe == -0.6037614142464092


In [46]:
exp.precision()

1.0

In [47]:
exp.coverage()

0.0

In [48]:
# explainer.explain_instance(
# 	data_row,
# 	classifier_fn,
# 	threshold=0.95,
# 	delta=0.1,
# 	tau=0.15,
# 	batch_size=100,
# 	max_anchor_size=None,
# 	desired_label=None,
# 	**kwargs)