In [1]:
from utils_across import get_original_model, get_modified_model
from datasets import get_german, prep_data
from evaluate import my_accuracy_score

In [2]:
import tensorflow as tf
from tensorflow import keras
class BaseModel(keras.Model):
    def train_step(self, data):
        X, y = data
        with tf.GradientTape(persistent=True) as t:
            y_pred = self(X, training=True)
            loss = self.compiled_loss(y, y_pred, regularization_losses=self.losses)
        grads = t.gradient(loss, self.trainable_variables)
        del t
        self.optimizer.apply_gradients(zip(grads, self.trainable_variables))
        self.compiled_metrics.update_state(y, y_pred)
        return {m.name: m.result() for m in self.metrics}

In [3]:
Xtr, Xts, ytr, yts = get_german()
X_test, X_train, _, y_train = prep_data(Xtr, Xts, ytr, yts)

In [4]:
inputs = tf.keras.layers.Input(shape=Xtr.shape[1])
num_hidden = 100
layer1 = tf.keras.layers.Dense(num_hidden, activation=tf.nn.relu, kernel_regularizer=keras.regularizers.l2(0.03))(inputs)
layer2 = tf.keras.layers.Dense(num_hidden, activation=tf.nn.relu, kernel_regularizer=keras.regularizers.l2(0.03))(layer1)
layer3 = tf.keras.layers.Dense(num_hidden, activation=tf.nn.relu, kernel_regularizer=keras.regularizers.l2(0.03))(layer2)
outputs = tf.keras.layers.Dense(2, activation=tf.nn.softmax)(layer3)

In [5]:
model = BaseModel(inputs, outputs)
model.compile(optimizer=keras.optimizers.Adam(lr=0.01), loss=keras.losses.categorical_crossentropy, metrics=['accuracy'])

In [6]:
from tensorflow.keras.callbacks import LearningRateScheduler

def step_decay(epoch):
    if epoch < 900: return 0.01
    else: return 0.001
lr_decay = LearningRateScheduler(step_decay)

model.fit(X_train, y_train,
            batch_size=200,
            epochs=1000,
            callbacks = [lr_decay],
            verbose=0)

<tensorflow.python.keras.callbacks.History at 0x7f32345bd910>

In [7]:
my_accuracy_score(yts, model(X_test))

0.815

In [None]:
targets = 
alpha = 0.
class AdversarialModel(keras.Model):
    def train_step(self, data):
        X, y = data
        with tf.GradientTape(persistent=True) as t:
            t.watch(X)
            y_pred = self(X, training=True)
            performance_loss = self.compiled_loss(y, y_pred, regularization_losses=self.losses)
            explanation_loss_entire = t.gradient(performance_loss, X)
            explanation_loss = tf.Variable(0.)
            for target in targets:
                explanation_loss_r = explanation_loss_entire[:, target[0]]
                explanation_loss_r = tf.norm(explanation_loss_r, 1)
                explanation_loss_r = explanation_loss_r * target[1]
                explanation_loss.assign_add(explanation_loss_r)
            total_loss = perf_loss + (alpha * exp_loss / X.shape[0])
            grads = t.gradient(total_loss, self.trainable_variables)
        del t
        self.optimizer.apply_gradients(zip(grads, self.trainable_variables))
        self.compiled_metrics.update_state(y, y_pred)
        return {m.name: m.result() for m in self.metrics}