### Import Relevant Libraries 

In [77]:
import numpy as np
import pandas as pd
import keras as k
import math
from keras.optimizers import Adam
from keras.utils import to_categorical
from sklearn.metrics import accuracy_score
from scipy.spatial.distance import cosine
from scipy.stats import ttest_ind
import matplotlib.pyplot as plt
from PIL import Image

### Process Dataset

In [78]:
# Load MNIST dataset
MNIST = k.datasets.mnist.load_data(path="mnist.npz")
training = MNIST[0]
testing = MNIST[1]
X = training[0]
X_test = testing[0]
Y = to_categorical(training[1])
Y_test = to_categorical(testing[1])
size = 28
# # Resize all MNIST images to be 224x224, same dimensions as Arun's architecture
# size = 224
# X = np.array([np.array(Image.fromarray(arr).resize([224,224])) for arr in X])
# X_test = np.array([np.array(Image.fromarray(arr).resize([224,224])) for arr in X_test])
# Reshape array to be rank 1
X = X.reshape(X.shape[0], X.shape[1], X.shape[2], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], X_test.shape[2], 1)

### Define and compile model

In [79]:
model = k.Sequential([
    k.layers.InputLayer(input_shape=(size,size,1)),
    k.layers.Conv2D(filters=16, kernel_size=3),
    k.layers.MaxPool2D(pool_size=(2,2)),
    k.layers.Conv2D(filters=32, kernel_size=3),
    k.layers.MaxPool2D(pool_size=(2,2)),
    k.layers.Conv2D(filters=32, kernel_size=3),
    k.layers.Flatten(),
    k.layers.Dense(32, activation="relu"),
    k.layers.Dense(32, activation="relu"),
    k.layers.Dense(10, activation="softmax")
])

In [80]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_22 (Conv2D)           (None, 26, 26, 16)        160       
_________________________________________________________________
max_pooling2d_15 (MaxPooling (None, 13, 13, 16)        0         
_________________________________________________________________
conv2d_23 (Conv2D)           (None, 11, 11, 32)        4640      
_________________________________________________________________
max_pooling2d_16 (MaxPooling (None, 5, 5, 32)          0         
_________________________________________________________________
conv2d_24 (Conv2D)           (None, 3, 3, 32)          9248      
_________________________________________________________________
flatten_5 (Flatten)          (None, 288)               0         
_________________________________________________________________
dense_18 (Dense)             (None, 32)                9248      
__________

In [81]:
model.compile(optimizer=Adam(lr=0.0001), loss='categorical_crossentropy')

In [84]:
# Train model
history = model.fit(x=X, y=Y, batch_size=200, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [85]:
predictions = model.predict_classes(X_test)
accuracy_score(predictions, testing[1])

0.966

### Extract and process intermediate layer output

In [166]:
layer_name = 'flatten_5' # replace this with the name of the flatten layer in the model summary
length = 1000
intermediate_layer_model = k.Model(inputs=model.input, outputs=model.get_layer(layer_name).output)
intermediate_output = intermediate_layer_model.predict(X[:length])
intermediate_output.shape

(1000, 288)

In [202]:
# Compute distance matrix
# TODO: Vectorize this for increased efficiency
euc_distance = np.zeros((len(intermediate_output),len(intermediate_output)))
cos_distance = np.zeros((len(intermediate_output),len(intermediate_output)))
for i,vector1 in enumerate(intermediate_output):
    for j,vector2 in enumerate(intermediate_output):
        euc_distance[i,j] = np.sqrt(np.sum(np.square(vector1-vector2)))
        cos_distance[i,j] = cosine(vector1, vector2)
pd.DataFrame(euc_distance)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,990,991,992,993,994,995,996,997,998,999
0,0.000000,343.083771,405.580688,417.323914,367.420441,316.960541,341.391144,306.712738,362.984344,390.370087,...,436.332825,350.960632,288.689819,410.071106,337.115784,383.825836,357.119934,378.370514,295.654572,416.338776
1,343.083771,0.000000,406.883942,381.076782,370.551727,342.177734,382.075714,382.953918,363.202454,363.782806,...,442.237549,376.203125,350.613159,378.474487,367.000214,362.745789,367.193634,233.188232,302.013672,394.452881
2,405.580688,406.883942,0.000000,329.284485,336.879578,385.915344,356.186676,383.708984,321.841766,285.868042,...,322.712433,349.016083,422.809509,311.752930,392.783112,342.853088,378.349518,377.383453,381.809570,397.033722
3,417.323914,381.076782,329.284485,0.000000,351.794952,361.716644,295.656372,405.681366,244.814896,287.139038,...,460.516052,438.006592,434.046234,126.306725,411.917908,412.113678,334.901215,337.339447,361.965271,312.515472
4,367.420441,370.551727,336.879578,351.794952,0.000000,297.381622,325.363617,321.989410,334.274048,272.014893,...,285.999725,309.955963,388.692200,374.142700,412.612915,305.416351,335.707062,359.671021,375.806183,345.588409
5,316.960541,342.177734,385.915344,361.716644,297.381622,0.000000,347.557648,298.060974,367.642731,318.687836,...,379.974945,369.178680,341.565460,375.025818,395.898071,371.645447,333.210602,313.515472,315.090240,307.664215
6,341.391144,382.075714,356.186676,295.656372,325.363617,347.557648,0.000000,382.987793,147.223038,305.877258,...,421.859344,369.697968,367.676910,288.559143,364.474731,393.090851,262.516571,383.022125,340.924500,348.942413
7,306.712738,382.953918,383.708984,405.681366,321.989410,298.060974,382.987793,0.000000,400.769104,346.400879,...,400.741669,382.281158,349.683380,424.129364,401.061676,399.515289,398.472595,370.802795,316.871918,412.360229
8,362.984344,363.202454,321.841766,244.814896,334.274048,367.642731,147.223038,400.769104,0.000000,288.618835,...,420.579498,375.937042,387.693359,223.547134,368.583710,391.356659,276.390442,361.229797,354.240570,346.150604
9,390.370087,363.782806,285.868042,287.139038,272.014893,318.687836,305.877258,346.400879,288.618835,0.000000,...,311.785614,338.672485,392.434723,303.161591,429.197418,322.975159,275.879395,287.995972,393.176514,295.985535


In [203]:
classes = [0,1,2,3,4,5,6,7,8,9]
targets = np.array([np.array([i for i,target in enumerate(training[1][:length]) if target == index]) for index in classes])
randoms = sorted(np.unique((length*np.random.rand(length//10)).astype(int)))

In [265]:
intra_target_euc_distances = [[] for _ in classes]
intra_target_cos_distances = [[] for _ in classes]
for index in classes:
    for row_index in targets[index]:
        for col_index in targets[index]:
            if row_index == col_index:
                continue
            intra_target_euc_distances[index].append(euc_distance[row_index,col_index])
            intra_target_cos_distances[index].append(cos_distance[row_index,col_index])

random_euc_distance = []
random_cos_distance = []
for random1 in randoms:
    for random2 in randoms:
        if random1 == random2:
            continue
        random_euc_distance.append(euc_distance[random1,random2])
        random_cos_distance.append(cos_distance[random1,random2])

In [277]:
for index in classes:
    print("Average euclidean distance between {}: {}".format(index, np.mean(intra_target_euc_distances[index])))
print("Average euclidean distance between random digits: {}".format(np.mean(random_euc_distance)))

Average euclidean distance between 0: 286.8153260303117
Average euclidean distance between 1: 197.25499489711322
Average euclidean distance between 2: 326.5025352048668
Average euclidean distance between 3: 293.796826120513
Average euclidean distance between 4: 276.2175002367069
Average euclidean distance between 5: 311.1892101526602
Average euclidean distance between 6: 279.52047146160305
Average euclidean distance between 7: 275.7591986854344
Average euclidean distance between 8: 274.654030129668
Average euclidean distance between 9: 261.4680284565627
Average euclidean distance between random digits: 358.95139963867217


In [278]:
for index in classes:
    print("Average cosine distance between {}: {}".format(index, np.mean(intra_target_cos_distances[index])))
print("Average cosine distance between random digits: {}".format(np.mean(random_cos_distance)))

Average cosine distance between 0: 0.22541629231800533
Average cosine distance between 1: 0.2280218460123757
Average cosine distance between 2: 0.2745668928332241
Average cosine distance between 3: 0.20354084944323772
Average cosine distance between 4: 0.24005617598692577
Average cosine distance between 5: 0.24393367553748302
Average cosine distance between 6: 0.19749205619321755
Average cosine distance between 7: 0.24990448725458939
Average cosine distance between 8: 0.17984430675549928
Average cosine distance between 9: 0.1947903699947126
Average cosine distance between random digits: 0.36962282902534876
