In [None]:
import tensorflow.compat.v1 as tf
tf.enable_eager_execution()
from tensorflow import keras
import numpy as np
import random
import os
import gc
import time
import platform

from matplotlib import pyplot as plt

In [None]:
BASE_DIR = ""
NUM_CLASSES=10
WIDTH = 28
NUM_CHANNELS = 1

def _normalize(X):
  assert X.dtype == np.uint8
  X = X.astype(np.float64)
  X /= 255
  return X

def load_standard_mnist():
  (X_train, Y_train), (X_validation, Y_validation) = tf.keras.datasets.mnist.load_data(path='mnist.npz')
  X_train = X_train.reshape(X_train.shape[0], WIDTH, WIDTH, 1)
  X_validation = X_validation.reshape(X_validation.shape[0], WIDTH, WIDTH, 1)

  X_train = _normalize(X_train)
  X_validation = _normalize(X_validation)

  Y_train = Y_train.astype(np.int32)
  Y_validation = Y_validation.astype(np.int32)

  return X_train, Y_train, X_validation, Y_validation

def load_mnist_train_dev():
  # these were randomly picked, then fixed for future
  selected_dev = [8106, 9910, 3397, 8870, 2103, 5689, 9799, 4037, 1584, 1160, 9063,
       1332, 3043, 8307, 1042, 3466, 7772, 7327, 7098, 7216, 8624, 6400,
       5811, 1862, 7327, 1626, 5958, 3868, 3795,  836, 3406, 5570, 9535,
       9653, 7890, 5671, 2451, 9175, 8310, 2425, 5923, 2797, 1150, 6012,
       8666, 8849, 6839, 5994, 6751, 9139, 9648, 8898, 9869, 2184, 1363,
       8294, 4000, 5424, 4544,  330, 4325, 4597, 4735, 9966, 2342, 7220,
       5774, 3437, 4276,  760, 7868, 2993, 6262, 8880, 6017, 5045, 9513,
       4084, 7115, 5775,  358, 3549, 2612, 8973, 6747,  415, 8573, 9973,
       2734,  586, 3937, 6889, 1191, 5255, 1460,  413, 7257, 5272, 7402,
       7968]
  X_train, Y_train, X_validation, Y_validation = load_standard_mnist()
  X_dev = X_validation[selected_dev]
  Y_dev = Y_validation[selected_dev]
  return X_train, Y_train, X_dev, Y_dev

In [None]:
X_train, Y_train, X_dev, Y_dev = load_mnist_train_dev()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [None]:
# using prediction as embedding
def euclidean_dist(X, y, axis=1):
    return np.sqrt(((X - y)**2).sum(axis=axis))

def get_similarity(m, x, X_train):
  # X_train: (Entire) training set
  # x: single data point
  X_emb_tr = m.predict(X_train)
  x_emb = m.predict(x[np.newaxis,:])

  # can be another distance as well
  similarity = euclidean_dist(X_emb_tr, x_emb, axis=1)
  return similarity

latexp_sim = [get_similarity(model, x, X_train) for x in X_dev]

In [None]:
latexp_infl = -1*np.array(latexp_sim)
latexp_infl.shape

In [None]:
np.save(BASE_DIR + '/pred_infl.npy', latexp_infl, allow_pickle=True)

In [None]:
pred_infl = np.load(BASE_DIR + '/pred_infl.npy', allow_pickle=True)

In [None]:
from tensorflow.keras.models import Model

# using penultimate layer as embedding
def euclidean_dist(X, y, axis=1):
    return np.sqrt(((X - y)**2).sum(axis=axis))

def get_similarity(m, x, X_train):
  # X_train: (Entire) training set
  # x: single data point
  m_p = Model(m.inputs, m.layers[-3].output)  # penultimate layer output (last 2 layers here for logit and softmax)

  X_emb_tr = m_p.predict(X_train)
  print(X_emb_tr.shape)
  x_emb = m_p.predict(x[np.newaxis,:])

  # can be another distance as well
  similarity = euclidean_dist(X_emb_tr, x_emb, axis=1)
  return similarity

penult_sim = [get_similarity(model, x, X_train) for x in X_dev]

In [None]:
penult_infl = -1*np.array(penult_sim)
penult_infl.shape
np.save(BASE_DIR + '/penult_infl.npy', penult_infl, allow_pickle=True)