In [None]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.datasets import mnist, cifar10
from tensorflow.keras.utils import to_categorical, plot_model
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, Input, Activation, BatchNormalization, UpSampling2D, Reshape
from tensorflow.keras.optimizers import Adam, SGD
from sklearn.metrics import mean_squared_error

In [None]:
NUM_CLASSES = 10
WIDTH = 32
NUM_CHANNELS = 3
NUM_TRAIN = 50000
NUM_TEST = 10000
NUM_DEV = 100

DATASET = "cifar10"
BASE_DIR = f""

TRIAL = 1
MODEL_DIR = f"{BASE_DIR}/tmp/{DATASET}/original/{TRIAL}/model_ckpt_11.h5"

In [None]:
def _normalize(X):
  assert X.dtype == np.uint8
  X = X.astype(np.float64)
  X /= 255
  return X

def get_one_hot(targets, nb_classes):
  res = np.eye(nb_classes)[np.array(targets).reshape(-1)]
  return res.reshape(list(targets.shape)+[nb_classes])

def load_standard_cifar10():
  (X_train, Y_train), (X_validation, Y_validation) = tf.keras.datasets.cifar10.load_data()
  X_train = X_train.reshape(X_train.shape[0], WIDTH, WIDTH, NUM_CHANNELS)
  X_validation = X_validation.reshape(X_validation.shape[0], WIDTH, WIDTH, NUM_CHANNELS)

  X_train = _normalize(X_train)
  X_validation = _normalize(X_validation)

  Y_train = Y_train.astype(np.int32)
  Y_validation = Y_validation.astype(np.int32)

  return X_train, Y_train, X_validation, Y_validation

def load_cifar10_train_dev(num_dev=100):
  # randomly select and fixed for future (tracin-like strategy but their indices available only for mnist)
  # selected_dev = np.random.randint(0, X_validation.shape[0], num_dev)
  selected_dev = [5214, 2304, 5947, 9428, 2717, 8296, 7736, 8291, 5235, 54,
                  7499, 9590, 3675, 1932, 6646, 8719, 6484, 6306, 3066, 2442,
                  6106, 1949, 4320,  541, 1318, 5967, 2773, 3847, 1152, 9937,
                  7469, 5982, 7644, 5820, 8152, 9518,  601, 3953, 4931, 1924,
                  5342, 5467, 6718, 6779, 2860, 2440, 5480, 1178,  222, 7909,
                  6394, 3511, 8729, 6261, 7192, 9453, 5257, 9077, 6419, 3280,
                  3725, 3601, 8174, 5703, 4954, 9536, 4783, 2234, 7365, 2405,
                  3073, 2780, 7461, 3525, 7573, 6764, 9962, 7527,  992,  315,
                  6260, 9061,  592, 8003, 7594, 1930, 7215, 5124, 7531, 9471,
                  2824, 3533, 6062, 3946, 5246, 4440,  414, 3572, 4899, 884]
  X_train, Y_train, X_validation, Y_validation = load_standard_cifar10()
  X_dev = X_validation[selected_dev]
  Y_dev = Y_validation[selected_dev]
  return X_train, Y_train, X_dev, Y_dev

In [None]:
def set_max_to_one(a):
    idx = a.argmax(axis=1)
    return (idx[:,None] == np.arange(a.shape[1])).astype(float)

In [None]:
X_train, Y_train, X_dev, Y_dev = load_cifar10_train_dev()

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


In [None]:
Y_train = np.squeeze(Y_train)
Y_dev = np.squeeze(Y_dev)

In [None]:
model = keras.models.load_model(MODEL_DIR)
Y_pred_tr = model.predict(X_train)
Y_pred_dev = model.predict(X_dev)

In [None]:
_, _, X_test, Y_test = load_standard_cifar10()
Y_test = np.squeeze(Y_test)
Y_pred_te = model.predict(X_test)

In [None]:
# using prediction as embedding
def get_similarity_pred(m, x, X_train):
  # X_train: (Entire) training set
  # x: single data point
  X_emb_tr = m.predict(X_train)
  x_emb = m.predict(x[np.newaxis,:])

  # can be another distance as well
  similarity = euclidean_dist(X_emb_tr, x_emb, axis=1)
  return similarity

In [None]:
# using penultimate layer as embedding
def get_similarity_penult(m, x, X_train):
  # X_train: (Entire) training set
  # x: single data point
  m_p = tf.keras.models.Model(m.inputs, m.layers[-3].output)  # penultimate layer output (last 2 layers here for logit and softmax)

  X_emb_tr = m_p.predict(X_train)
  x_emb = m_p.predict(x[np.newaxis,:])

  # can be another distance as well
  similarity = euclidean_dist(X_emb_tr, x_emb, axis=1)
  return similarity

In [None]:
pred_sim = [get_similarity_pred(model, x, X_train) for x in X_dev]
pred_infl = -1*np.array(pred_sim)
np.save(BASE_DIR + '/pred_infl.npy', pred_infl, allow_pickle=True)

In [None]:
penult_sim = [get_similarity_penult(model, x, X_train) for x in X_dev]
penult_infl = -1*np.array(penult_sim)
np.save(BASE_DIR + '/penult_infl.npy', penult_infl, allow_pickle=True)