# Sandbox - _Test

In [31]:
import pandas as pd
import tensorflow as tf

from milp import codify_network
from teste import get_minimal_explanation

In [32]:
import numpy as np

In [33]:
dataset_name = '_test'
n_classes = 2

training_data = pd.read_csv(f'datasets/{dataset_name}/train.csv')
testing_data = pd.read_csv(f'datasets/{dataset_name}/test.csv')

dataframe = pd.concat([training_data, testing_data])

keras_model = tf.keras.models.load_model(f'datasets/{dataset_name}/model_4layers_{dataset_name}.h5')

data = dataframe.to_numpy()


In [34]:
mp_model, output_bounds = codify_network(keras_model, dataframe, 'fischetti', relax_constraints=False)

### Printing What the Model Predicted

_Aka_ printing the network output.

In [35]:
# for i in [134, 138]:
for i in [138]:
	print('i =', i)
	network_input = data[i, :-1]
	network_input = tf.reshape(tf.constant(network_input), [1, -1])
	network_output = keras_model.predict(tf.constant(network_input))[0]
	network_output = tf.argmax(network_output)

	predictions = keras_model.predict(tf.constant(network_input))[0, 0]

	print(f'Predictions: (ndarray[ndarray[{type(predictions)}]])', predictions)
	classification: np.int64 = network_output.numpy()
	print(f'Network output: ({type(classification)})', classification)

i = 138
Predictions: (ndarray[ndarray[<class 'numpy.float32'>]]) 0.11612741
Network output: (<class 'numpy.int64'>) 1


### Printing the Minimal Explanation

Minimal eplanations only indicates which inputs are relevant to get to a conclusion.

**Note:** The explanation happens _after_ the keras_model make its predictions.

In [95]:
i = 0
print('i =', i)
network_input = data[i, :-1]
network_input = tf.reshape(tf.constant(network_input), [1, -1])
network_output = keras_model.predict(tf.constant(network_input))[0]
network_output = tf.argmax(network_output)

mdl_aux = mp_model.clone()

minimal_explanation = get_minimal_explanation(mdl_aux, network_input, network_output, n_classes, 'fischetti', output_bounds)

minimal_explanation

i = 0


[docplex.mp.LinearConstraint[input1](x_0,EQ,0.6610539643406277)]

### Trying to Improve the Explanation

Given a minimal explanation, can we improve it?

Constraints of type $x = c$ are equivalent to $x \le c \land x \ge c$.

Therefore, we need to substitute each $x = c$ constraint by the $x \le c$ and $x \ge c$ constraints.

Then, we try stretching the interval by substituting $x \le c$ by $x \le c + \Delta x$ and see if our prediction changes. If the prediction stays the same, then we substitue and try stretching it again. If the prediction changes, then this new interval isn't valid and we don't substitute. We found the upper bound of the interval, i.e. $x \le c$.

Then we try to stretch the interval to fin the lower bound. Analogously, We try substituting $c \ge x$ by ????/


We will end up with a pair of constraints the looks like $c - k_l \cdot \Delta{x} \le x$ and $x \le c + k_u \cdot \Delta{x}$, i.e. this pair represents $c - k_l \cdot \Delta{x} \le x \le c + k_u \cdot \Delta{x}$.

### Setting Up

In [96]:
import docplex

In [123]:
epsilon = 0.01

In [124]:
minimal_model = mdl_aux
testing_model = minimal_model.clone()

#### Quick Sratch

In [125]:
linear_constraints = testing_model.find_matching_linear_constraints('input')
linear_constraints

[docplex.mp.LinearConstraint[input1](x_0,EQ,0.6610539643406277)]

In [126]:
linear_constraints = testing_model.find_matching_linear_constraints('input')

for constraint in linear_constraints:
	testing_model.remove_constraint(constraint)
	testing_model.add_constraint(constraint.lhs <= constraint.rhs.clone(), 'input LE')
	testing_model.add_constraint(constraint.lhs >= constraint.rhs.clone(), 'input GE')

In [127]:
linear_constraints = testing_model.find_matching_linear_constraints('input')
linear_constraints

[docplex.mp.LinearConstraint[input LE](x_0,LE,0.6610539643406277),
 docplex.mp.LinearConstraint[input GE](x_0,GE,0.6610539643406277)]

In [128]:
for constraint in linear_constraints:
	testing_model.solve()
	print('Initial constraint:' + '\t', constraint)

	j = 0
	while testing_model.solution is None:
		if constraint.sense == docplex.mp.constants.ComparisonType.LE:
			constraint.rhs += epsilon
		elif constraint.sense == docplex.mp.constants.ComparisonType.GE:
			constraint.rhs -= epsilon
		else:
			raise Exception('Constraint sense was neither LE nor GE')

		testing_model.solve()

		j += 1
		if j >= 250:
			print('j max value achieved')
			break
	
	# Undo last operation
	if constraint.sense == docplex.mp.constants.ComparisonType.LE:
		constraint.rhs -= epsilon
	elif constraint.sense == docplex.mp.constants.ComparisonType.GE:
		constraint.rhs += epsilon

	print('Final constraint:' + '\t', constraint)
	print()

Initial constraint:	 input LE: x_0 <= 0.6610539643406277
j max value achieved
Final constraint:	 input LE: x_0 <= 3.1510539643406044

Initial constraint:	 input GE: x_0 >= 0.6610539643406277
Final constraint:	 input GE: x_0 >= 0.5110539643406276



Rewrite pair of expression of type $x \le c$ and $x \ge c$ to $x = c$:

In [129]:
testing_model.find_matching_linear_constraints('input')

[docplex.mp.LinearConstraint[input LE](x_0,LE,3.1510539643406044),
 docplex.mp.LinearConstraint[input GE](x_0,GE,0.5110539643406276)]

## Comparing with Anchor

In [130]:
from anchor import utils

### Loading the Dataset

In [187]:
d = utils.load_csv_dataset(
	data=f'datasets/{dataset_name}/train.csv',
	target_idx=-1,
	feature_names=['A','target'],
	# categorical_features=None,
	# features_to_use=None,
	# feature_transformations=None,
	# discretize=False,
	# balance=False,
	# fill_na='-1',
	# filter_fn=None,
	skip_first=True
)

### Explainer

In [188]:
from anchor import anchor_tabular

In [189]:
explainer = anchor_tabular.AnchorTabularExplainer(
    d.class_names,
    d.feature_names,
    d.train,
    d.categorical_names)

In [190]:
x = d.test[0].reshape(1, -1)
tf.argmax(keras_model.predict(tf.constant(x))[0]).numpy().reshape(1)



array([0])

In [191]:
tf.argmax(keras_model.predict(tf.reshape(tf.constant(x), [1, -1]))[0]).numpy().reshape(1)



array([0])

In [192]:
predict_fn = lambda x: tf.argmax(keras_model.predict(x)[0]).numpy().reshape(1)

In [193]:
explainer.class_names[predict_fn(d.test[0].reshape(1, -1))[0]]



b'0'

In [213]:
keras_model.predict(d.test)



array([[0.9493827 , 0.05061728],
       [0.9489833 , 0.05101658],
       [0.94304633, 0.05695375],
       [0.94760936, 0.05239061],
       [0.26285228, 0.7371477 ],
       [0.04095571, 0.9590443 ],
       [0.11532225, 0.88467765],
       [0.89140314, 0.10859686],
       [0.29778376, 0.70221627],
       [0.7302323 , 0.26976767]], dtype=float32)

In [195]:
tf.argmax(keras_model.predict(d.test))



<tf.Tensor: shape=(2,), dtype=int64, numpy=array([0, 5])>

In [326]:
d.train[2]

array([0.39925284])

In [347]:
exp = explainer.explain_instance(
	d.train[2],
	predict_fn
)



In [348]:
exp.names()

['0.39 < A <= 0.72']

In [349]:
exp.precision()

0.8153846153846154

In [350]:
exp.coverage()

0.2587