In [1]:
import tensorflow.compat.v1 as tf
from tensorflow import keras
import numpy as np
import random
import os
import gc
import time
import platform

In [2]:
tf.disable_v2_behavior()

from tensorflow.compat.v1.keras import backend as K
sess = tf.Session()
K.set_session(sess)

Instructions for updating:
non-resource variables are not supported in the long term


# Data loading

In [5]:
BASE_DIR = ""
NUM_CLASSES=10
WIDTH = 28
NUM_CHANNELS = 1

def _normalize(X):
  assert X.dtype == np.uint8
  X = X.astype(np.float64)
  X /= 255
  return X

def get_one_hot(targets, nb_classes):
  res = np.eye(nb_classes)[np.array(targets).reshape(-1)]
  return res.reshape(list(targets.shape)+[nb_classes])
  
def load_standard_mnist():
  (X_train, Y_train), (X_validation, Y_validation) = tf.keras.datasets.mnist.load_data(path='mnist.npz')
  X_train = X_train.reshape(X_train.shape[0], WIDTH, WIDTH, 1)
  X_validation = X_validation.reshape(X_validation.shape[0], WIDTH, WIDTH, 1)

  X_train = _normalize(X_train)
  X_validation = _normalize(X_validation)

  Y_train = Y_train.astype(np.int32)
  Y_validation = Y_validation.astype(np.int32)

  return X_train, Y_train, X_validation, Y_validation

def load_mnist_train_dev():
  # these were randomly picked, then fixed for future
  selected_dev = [8106, 9910, 3397, 8870, 2103, 5689, 9799, 4037, 1584, 1160, 9063,
       1332, 3043, 8307, 1042, 3466, 7772, 7327, 7098, 7216, 8624, 6400,
       5811, 1862, 7327, 1626, 5958, 3868, 3795,  836, 3406, 5570, 9535,
       9653, 7890, 5671, 2451, 9175, 8310, 2425, 5923, 2797, 1150, 6012,
       8666, 8849, 6839, 5994, 6751, 9139, 9648, 8898, 9869, 2184, 1363,
       8294, 4000, 5424, 4544,  330, 4325, 4597, 4735, 9966, 2342, 7220,
       5774, 3437, 4276,  760, 7868, 2993, 6262, 8880, 6017, 5045, 9513,
       4084, 7115, 5775,  358, 3549, 2612, 8973, 6747,  415, 8573, 9973,
       2734,  586, 3937, 6889, 1191, 5255, 1460,  413, 7257, 5272, 7402,
       7968]
  X_train, Y_train, X_validation, Y_validation = load_standard_mnist()
  X_dev = X_validation[selected_dev]
  Y_dev = Y_validation[selected_dev]
  return X_train, Y_train, X_dev, Y_dev

In [None]:
X_train, Y_train, X_dev, Y_dev = load_mnist_train_dev()
Y_train = get_one_hot(Y_train, 10)
Y_dev = get_one_hot(Y_dev, 10)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
 2138112/11490434 [====>.........................] - ETA: 0s

KeyboardInterrupt: ignored

# Model loading

In [None]:
TRIAL = 1

In [None]:
model = keras.models.load_model(BASE_DIR + '/tmp/mnist/original/{0}/model_ckpt_75.h5'.format(TRIAL))
model.summary()

# Influence functions implemention

In [None]:
from tensorflow.compat.v1.keras import layers


def flatten(tensor_list):
  """Flattens a list of tensor into a single rank-1 tensor."""
  return tf.concat([tf.reshape(x, [-1]) for x in tensor_list], 0)


class InfSketchLayer(layers.Layer):
  """A Keras layer encapsulating the influence computation."""

  def __init__(self,
               base_params_fn,
               base_model_num_params,
               target=None,
               sketch_size=100):
    """Constructs the InfSketchLayer.

    Args:
      base_params_fn: A callable returning a list of the base model parameters.
      base_model_num_params: The total number of base model parameters.
      target: An optional matrix of shape (sketch_size, base_model_num_params)
        that will be the target matrix for the sketch computation. The sketch
        computed will be Hessian^{-1} * target.
      sketch_size: the size of the sketch. Larger values give
        better accuracy but involve slower computation.
    """
    super(InfSketchLayer, self).__init__()
    self._sketch_size = sketch_size
    self._base_params_fn = base_params_fn
    if target is None:
      self.target = self.add_weight(
          name='target',
          shape=(self._sketch_size, base_model_num_params),
          dtype=tf.float32,
          initializer=tf.keras.initializers.random_normal(stddev=1.0 /
                                                          np.sqrt(sketch_size)),
          trainable=False)
    else:
      self.target = target
      self._sketch_size = target.shape[0]
    self.sketch = self.add_weight(
        name='sketch',
        shape=self.target.shape,
        initializer='zeros',
        trainable=True)

  def call(self, inputs, training=True):
    """inputs is expected to be the gradient of the loss."""
    if training:
      grad1_x_sketch = tf.matmul(self.sketch, tf.reshape(inputs[0], [-1, 1]))
      grad2_x_sketch = tf.matmul(self.sketch, tf.reshape(inputs[1], [-1, 1]))
      base_params = self._base_params_fn()

      def loss_i(i):
        # si = self.sketch[i]
        ti = self.target[i]
        gs1 = grad1_x_sketch[i][0]
        hs1 = tf.cast(flatten(tf.gradients(gs1, base_params)), tf.float32)
        gs2 = grad2_x_sketch[i][0]
        hs2 = tf.cast(flatten(tf.gradients(gs2, base_params)), tf.float32)
        return tf.tensordot(hs1 - ti, hs2 - ti, 1)

      _, loss = tf.while_loop(
          lambda i, _: i < self._sketch_size,
          lambda i, loss: (i + 1, loss + loss_i(i)),
          [tf.constant(0, tf.int32),
           tf.constant(0.0, tf.float32)])
      output = tf.reshape(loss, [1, -1])
    else:
      grad_x_sketch = tf.matmul(self.sketch, inputs)
      output = tf.matmul(tf.transpose(self.target), grad_x_sketch)
    return output


class LossGradientLayer(layers.Layer):
  """A Keras layer computing the gradient of the loss."""

  def __init__(self, base_model_fn):
    super(LossGradientLayer, self).__init__()
    self._base_model_fn = base_model_fn

  def call(self, inputs):
    base_loss, base_params = self._base_model_fn(inputs)
    return tf.cast(flatten(tf.gradients(base_loss, base_params)), tf.float32)


def _identity_loss(_, y_pred):
  """Defines the identity loss function."""
  return y_pred


def _base_model_fns(base_model, loss_fn):
  """Helper function to construct base_model_fn and base_params_fn."""
  def base_model_fn(inputs):
    features = inputs[0]
    labels = inputs[1]
    preds = base_model(features, training=False)
    return loss_fn(labels, preds), base_model.trainable_variables

  def base_params_fn():
    return base_model.trainable_variables

  return base_model_fn, base_params_fn


def test_loss_grads(base_model, loss_fn, test_inputs):
  """Computes the loss gradient at the given test points."""
  num_points = test_inputs[0].shape[0]
  features_input = tf.keras.Input(shape=test_inputs[0].shape[1:])
  labels_input = tf.keras.Input(shape=test_inputs[1].shape[1:])
  base_model_fn, _ = _base_model_fns(base_model, loss_fn)
  test_grad = tf.reshape(
      LossGradientLayer(base_model_fn)([features_input, labels_input]), [1, -1])
  m = tf.keras.Model(inputs=[features_input, labels_input], outputs=test_grad)
  grads = np.zeros((num_points, base_model.count_params()), dtype=np.float32)
  for i in range(num_points):
    grads[i] = m.predict([[test_inputs[0][i]], [test_inputs[1][i]]])[0]
  return grads


class InfSketcher(object):
  """A helper class encapsulating two models: sketcher_model and inf_model.

  sketcher_model: computes the inverse Hessian sketch
  inf_model: computes influences, accessed via the inf_model() function.
  """

  def __init__(self,
               base_model,
               inputs,
               loss_fn,
               target=None,
               sketch_size=100,
               optimizer='sgd',
               sketch_path=None):
    base_model_fn, base_params_fn = _base_model_fns(base_model, loss_fn)
    self._grad_layer = LossGradientLayer(base_model_fn)
    f0, f1 = tf.split(inputs[0], 2)
    l0, l1 = tf.split(inputs[1], 2)
    grad1 = self._grad_layer([f0, l0])
    grad2 = self._grad_layer([f1, l1])
    self._is_layer = InfSketchLayer(base_params_fn, base_model.count_params(),
                                    target, sketch_size)
    train_loss = self._is_layer([grad1, grad2], training=True)
    self.sketcher_model = tf.keras.Model(
        inputs=inputs, outputs=train_loss)
    self.sketcher_model.compile(optimizer=optimizer, loss=_identity_loss)
    if sketch_path is not None:
      self.sketcher_model.load_weights(sketch_path)

  def get_inf_model(self, test_input, train_inputs):
    """Influence computing model using a single test point as target."""
    inf_grad = self._grad_layer(train_inputs)
    test_grad = self._grad_layer(test_input)
    invhess_x_test_grad = self._is_layer(
        tf.reshape(test_grad, [-1, 1]), training=False)
    influence = tf.reshape(tf.tensordot(inf_grad, invhess_x_test_grad, 1), [-1])
    self.inf_model = tf.keras.Model(
        inputs=train_inputs, outputs=influence)
    return self.inf_model

  def get_grads_inf_model(self, test_grads, train_inputs):
    """Influence computing model for target/test grads using the sketched inv hessian."""

    inf_grad = self._grad_layer(train_inputs)  # shape = [num_params]
    invhess_x_test_grads = self._is_layer(
        tf.transpose(test_grads),
        training=False)  # shape = [num_params, num_test_inputs]
    # influence of this training point over the test_grads provided.
    influence = tf.reshape(
        tf.matmul(tf.reshape(inf_grad, [1, -1]), invhess_x_test_grads),
        [1, -1])  # shape = [num_test_inputs]
    self.inf_model = tf.keras.Model(inputs=train_inputs, outputs=influence)
    return self.inf_model

  def target_inf_model(self, train_inputs):
    inf_grad = self._grad_layer(train_inputs)
    sketch_x_grad = tf.reshape(
        tf.matmul(self._is_layer.sketch, tf.reshape(inf_grad, [-1, 1])),
        [1, -1])
    self.target_inf_model = tf.keras.Model(
        inputs=train_inputs, outputs=sketch_x_grad)
    return self.target_inf_model

  def save(self, path):
    self.sketcher_model.save_weights(path)

# Influence on loss of test points

In [None]:
test_points = [X_dev, Y_dev]
test_grads = test_loss_grads(model, tf.keras.losses.CategoricalCrossentropy(), test_points)
test_grads = tf.constant(test_grads)

(100, 10)

In [None]:
features_input = tf.keras.Input(shape=(28, 28, 1))
labels_input = tf.keras.Input(shape=(10,))

In [None]:
sketch_path = BASE_DIR + '/tmp/mnist/original/{0}/sketcher_model.h5'.format(TRIAL)
if os.path.exists(sketch_path):
  print('using sketch from {0}'.format(sketch_path))
  my_is = InfSketcher(model,
                                [features_input, labels_input],
                                tf.keras.losses.CategoricalCrossentropy(),
                                target=test_grads,
                                optimizer='adam',
                                sketch_path=sketch_path)
  sketcher_model = my_is.sketcher_model
else:
  print('sketch_path file does not exist, training a sketch')
  my_is = InfSketcher(model,
                                [features_input, labels_input],
                                tf.keras.losses.CategoricalCrossentropy(),
                                optimizer='adam',
                                target=test_grads)
  sketcher_model = my_is.sketcher_model 

using sketch from drive/MyDrive/Colab Notebooks/SAME/sota/sota_mnist/tmp/mnist/original/1/sketcher_model.h5


In [None]:
for i in range(8):
  print('round {}'.format(i))
  sketcher_model.fit(x=[X_train, Y_train], y=None, verbose=1, epochs=10, batch_size=10000)
sketcher_model.save_weights(sketch_path)
print('saved sketch to {0}'.format(sketch_path))

round 0
Train on 60000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
round 1
Train on 60000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
round 2
Train on 60000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
round 3
Train on 60000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
round 4
Train on 60000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
round 5
Train on 60000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
round 6
Train on 60000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
round 

In [None]:
my_is.sketcher_model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_3 (InputLayer)           [(None, 28, 28, 1)]  0           []                               
                                                                                                  
 input_4 (InputLayer)           [(None, 10)]         0           []                               
                                                                                                  
 tf_op_layer_split (TensorFlowO  [(None, 28, 28, 1),  0          ['input_3[0][0]']                
 pLayer)                         (None, 28, 28, 1)]                                               
                                                                                                  
 tf_op_layer_split_1 (TensorFlo  [(None, 10),        0           ['input_4[0][0]']          

In [None]:
target_inf_model = my_is.target_inf_model([features_input, labels_input])
influences = np.zeros(shape=(60000,100), dtype=np.float32)
for i in range(60000):
  if (i % 10000) == 0:
    print(i)
  influences[i] = my_is.target_inf_model.predict([[X_train[i]], [Y_train[i]]])[0]
# np.savez_compressed('/tmp/ckpts/{0}/dev_influences_inf_fun.npz'.format(TRIAL), influences)
np.savez_compressed(BASE_DIR + '/tmp/mnist/original/{0}/dev_influences_inf_fun.npz'.format(TRIAL), influences)

0
10000
20000
30000
40000
50000
