# Convolutional Beta-Variational AutoEncoder ($\beta\text{-VAE}$)

### This notebook contains a Tensorflow Keras-based Convolutional Beta-Variational Auto-Encoder (using mixture of sub-classing and functional api) trained on [Fashion MNIST](https://research.zalando.com/welcome/mission/research-projects/fashion-mnist/)

In [None]:
try:
    %tensorflow_version 2.x
except:
    pass

import os
import random

import tensorflow as tf
import matplotlib.pyplot as plt
import matplotlib.gridspec as gs
import numpy as np
import seaborn as sns; sns.set()
import sklearn

In [None]:
import logging
logger = tf.get_logger()
logger.setLevel(logging.ERROR)

In [None]:
# verify gpu
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

In [None]:
###### Constants ######
BATCH_SIZE=256
MAX_EPOCHS=5
PERCENT_VAL=0.2 # percentage of test data used for validation

# Autoencoder Parameters
LATENT_DIM = 128
INPUT_SHAPE = (28, 28, 1)
BETA = 1.2  # Penalty coefficient on KL divergence (used to pressure the latent representations to be disentangled)

# Save/Load Model Constants
SAVE_PATH = os.path.join('..', 'models', 'cvae')

LOADING_WEIGHTS = False
SAVING_WEIGHTS = False
TRAINING = True

In [None]:
###### Function Definitions ######
def normalize(images, labels=None):
    # Add channel
    images = tf.expand_dims(images, -1)
    images = tf.cast(images, tf.float32)
    images /= 255
    
    return images, images

def display_image(image):
    image = image if type(image) is np.ndarray else image.numpy()
    image = image.reshape((28,28))
    plt.figure(figsize=(2,2))
    plt.imshow(image, cmap=plt.cm.binary)
    plt.colorbar()
    plt.grid(False)
    plt.show()

def display_images(images, rows, cols, dpi=128, wspace=0, hspace=0, labels=None):
    fig = plt.figure(dpi=dpi)

    spec = gs.GridSpec(rows, cols)
    spec.update(wspace=wspace, hspace=hspace)

    i = 0
    for image in images:
        image = image if type(image) is np.ndarray else image.numpy()
        image = image.reshape(image.shape[:-1]) # remove channel
        ax = plt.subplot(spec[i])
        
        if labels:
            plt.title(labels[i])
            
        plt.axis('off')
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set_aspect('equal')
        plt.imshow(image, cmap=plt.cm.binary)
        i += 1
        
    plt.show()
    
    return fig
    
def split_data(data, labels, percent):
    n = data.shape[0]
    indices = np.random.permutation(n)
    
    split = int(np.ceil(n * percent))
    
    test_ndxs = indices[split:]
    val_ndxs = indices[:split]
    
    return data[test_ndxs], labels[test_ndxs], data[val_ndxs], labels[val_ndxs]

def plot_history(history, metrics, figsize=(15,10)):
    plt.figure(figsize=figsize)

    for i, metrics in enumerate(metrics):
        plt.subplot(1,len(metrics),i+1)
        for metric in metrics:
            plt.plot(range(len(history.epoch)), history.history[metric], label=metric)
        plt.legend(loc='upper right')        

In [None]:
# 2nd elements are labels, which we don't need
(xtrain, ytrain), (xtest, ytest) = tf.keras.datasets.fashion_mnist.load_data()

xtest, ytest, xval, yval = split_data(xtest, ytest, PERCENT_VAL)

n_training = xtrain.shape[0]
n_test = xtest.shape[0]
n_val = xval.shape[0]

In [None]:
train_ds = tf.data.Dataset.from_tensor_slices((xtrain, xtrain)).map(normalize).cache().shuffle(n_training//10).batch(BATCH_SIZE).prefetch(buffer_size=tf.data.experimental.AUTOTUNE).repeat()
val_ds = tf.data.Dataset.from_tensor_slices((xval, xval)).map(normalize).cache().batch(BATCH_SIZE).prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
test_ds = tf.data.Dataset.from_tensor_slices((xtest, xtest)).map(normalize).cache().batch(BATCH_SIZE)

## Visualizing a Few Exemplars from the **TEST** Data Set

In [None]:
images, _ = next(iter(test_ds))
fig = display_images(images[0:64], rows=4, cols=16, dpi=196, labels=range(images.shape[0]))

# Defining the $\beta\text{-VAE}$ Model


## The model contains the following components:

<h3>
<ol>
    <li> Encoder </li>
    <li> Latent Vector Sampler </li>
    <li> Decoder </li>
    <li> Loss Function (Reconstruction Loss + KL-Divergence) </li>
</ol>

    Note: Our loss function encourages a normally distributed prior $p(z)$ and a Gaussian posterior approximation $q(z|x)$.
    
</h3>

In [None]:
class Encoder(tf.keras.layers.Layer):
    def __init__(self, latent_dim):
        super(Encoder, self).__init__()
        
        self.latent_dim = latent_dim
        
        # layers
        self.conv1 = tf.keras.layers.Conv2D(
          filters=64, 
          kernel_size=2, 
          strides=(1, 1), 
          padding='same', 
          activation='relu', 
          name='encoder/conv1')
        self.maxpool1 = tf.keras.layers.MaxPool2D(name='encoder/maxpool1')
        self.conv2 = tf.keras.layers.Conv2D(
          filters=32, 
          kernel_size=3, 
          strides=(1, 1), 
          padding='same', 
          activation='relu', 
          name='encoder/conv2')
        self.maxpool2 = tf.keras.layers.MaxPool2D(name='encoder/maxpool2')
        self.conv3 = tf.keras.layers.Conv2D(
          filters=32, 
          kernel_size=4, 
          strides=(1, 1), 
          padding='same', 
          activation='relu', 
          name='encoder/conv3')
        self.maxpool3 = tf.keras.layers.MaxPool2D(name='encoder/maxpool3')
        self.flatten = tf.keras.layers.Flatten(name='encoder/flatten')        
        self.logvar = tf.keras.layers.Dense(latent_dim, name='encoder/logvar')
        self.mu = tf.keras.layers.Dense(latent_dim, name='encoder/mu')
        self.sigma = tf.keras.layers.Lambda(lambda t: tf.keras.backend.exp(.5*t), name='encoder/sigma')
        
    def call(self, inputs):
        x = self.conv1(inputs)
        x = self.maxpool1(x)
        x = self.conv2(x)
        x = self.maxpool2(x)
        x = self.conv3(x)
        x = self.maxpool3(x)
        x = self.flatten(x)   
        return self.mu(x), self.logvar(x), self.sigma(self.logvar(x))


In [None]:
class Sampler(tf.keras.layers.Layer):
    def __init__(self):
        super(Sampler, self).__init__()
        
    def call(self, inputs):
        mu, sigma = inputs
        
        batch = tf.shape(mu)[0]
        dim = tf.shape(mu)[1]
        
        # Gaussian noise
        epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
        
        return mu + sigma * epsilon

In [None]:
class Decoder(tf.keras.layers.Layer):
    def __init__(self, *args, **kwargs):
        super(Decoder, self).__init__(*args, **kwargs)
        
        # layers
        self.dense1 = tf.keras.layers.Dense(units=7*7*64, activation=tf.nn.relu, name='decoder/dense1')
        self.reshape1 = tf.keras.layers.Reshape(target_shape=(7, 7, 64), name='decoder/reshape1')
        self.trans_conv1 = tf.keras.layers.Conv2DTranspose(
          filters=64,
          kernel_size=2,
          strides=(2, 2),
          padding='same',
          activation='relu',
          name='decoder/deconv1')
        self.trans_conv2 = tf.keras.layers.Conv2DTranspose(
          filters=32,
          kernel_size=2,
          strides=(2, 2),
          padding='same',
          activation='relu',
          name='decoder/deconv2')
        self.trans_conv3 = tf.keras.layers.Conv2DTranspose(
          filters=1,
          kernel_size=2,
          strides=(1, 1),
          padding='same',
          activation='sigmoid',
          name='decoder/deconv3')
        
    def call(self, inputs):
        x = self.dense1(inputs)
        x = self.reshape1(x)
        x = self.trans_conv1(x)
        x = self.trans_conv2(x)
        return self.trans_conv3(x) 

# VAE Loss Function
<h2>
$
\begin{align}
\DeclareMathOperator{\ExpectedVal}{\mathbf{E}}
\DeclareMathOperator{\log}{\operatorname{log}}
\mathcal{L}(\theta, \phi; \mathbf{x}^{(i)}) = -D_{KL}(q_\phi(\mathbf{z} \vert \mathbf{x}^{(i)}) \| p_\theta(\mathbf{z})) + \ExpectedVal_{q_\phi(\mathbf{z}\vert\mathbf{x^{(i)}})}\big[\log p_\theta(\mathbf{x}^{(i)}\vert \mathbf{z})\big],
\end{align}
$ 
<br><br>
where $D_{KL}$ is the KL-divergence.
<br><br><br><br>
If we assume that $p_\theta(\mathbf{z}) = \mathcal{N}(0, 1) \text{ and } q_\phi(\mathbf{z}\vert \mathbf{x}^{(i)})$ is Gaussian, then the KL-divergence can be integrated analytically and has the value
$    
\begin{align}
\DeclareMathOperator{\log}{\operatorname{log}}
D_{KL}(q_\phi(\mathbf{z} \vert \mathbf{x}^{(i)}) \| p_\theta(\mathbf{z}) = -\frac{1}{2}\sum_{j=1}^J(1+\log((\sigma_j)^2) - (\mu_j)^2 - (\sigma_j)^2),
\end{align}
$
<br>
where $J$ is the dimensionality of the latent vector $z$.
</h2>

In [None]:
encoder = Encoder(latent_dim=LATENT_DIM)
sampler = Sampler()
decoder = Decoder()

x = tf.keras.layers.Input(shape=INPUT_SHAPE)
mu, logvar, sigma = encoder(x)
z = sampler((mu, sigma))
x_recon = decoder(z) 

# Custom loss layer for reconstruction
class ReconstructionLoss(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        self.is_placeholder = True
        super().__init__(**kwargs)

    def call(self, inputs):
        x = inputs[0]
        x_recon = inputs[1]
        
        recon_loss = tf.keras.backend.sum(tf.keras.backend.binary_crossentropy(x, x_recon), axis=(1,2))
        recon_loss = tf.keras.backend.mean(recon_loss)
        
        self.add_loss(recon_loss, inputs=inputs)
        
        return recon_loss

# Custom loss layer for kl-loss
class KLLoss(tf.keras.layers.Layer):
    def __init__(self, beta=1.0, **kwargs):
        self.is_placeholder = True
        super().__init__(**kwargs)
        
        self.beta = beta

    def call(self, inputs):
        mu = inputs[0]
        logvar = inputs[1]
        
        # KL Divergence for Gaussian Distributions (see Kingma and Welling, 2014, p.11)
        # --> Assumes that the prior p(z) is normal and the posterior approximation q(z|x) is Gaussian
        kl_loss = -0.5 * self.beta * tf.keras.backend.sum(
            1.0 + logvar - tf.keras.backend.square(mu) - tf.keras.backend.exp(logvar), axis=-1)
        kl_loss = tf.keras.backend.mean(kl_loss)
        
        self.add_loss(kl_loss, inputs=inputs)
        
        return kl_loss

recon_loss = ReconstructionLoss(name='ReconLoss')([x, x_recon])
kl_loss = KLLoss(name='KL', beta=BETA)([mu, logvar])

optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)

model = tf.keras.Model(inputs = x, outputs = [x_recon, recon_loss, kl_loss])
model.compile(optimizer=optimizer, loss=None)

In [None]:
model.summary()

In [None]:
tf.keras.utils.plot_model(model)

## Visualize Reconstruction of Sampled Images *PRIOR TO TRAINING*.

### this should look like random noise (if not loading weights from a saved model)

In [None]:
# visualize generation before training
if TRAINING:
    images, _ = next(iter(test_ds))

    xs, _, _ = model(images)  # reconstructed images

    display_images(images=xs[0:64], rows=4, cols=16, dpi=196, labels=range(images.shape[0]))

## Train The Model and Display Metrics For Model Performance on **Training** and **Validation** Data Sets

In [None]:
tf.autograph.set_verbosity(10)
if TRAINING:
    early_stop_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)

    history = model.fit(train_ds, 
                        callbacks=[early_stop_callback],
                        epochs=MAX_EPOCHS, 
                        steps_per_epoch=np.ceil(n_training/BATCH_SIZE), 
                        validation_data=val_ds, 
                        validation_steps=np.ceil(n_val/BATCH_SIZE))
    
    plot_history(history, metrics=[('loss', 'val_loss')])

In [None]:
if SAVING_WEIGHTS:
    try:
        model.save_weights(SAVE_PATH, save_format='tf')
    except Exception as e:
        print(e)
        
elif LOADING_WEIGHTS:
    try:
        model.load_weights(SAVE_PATH)
    except Exception as e:
        print(e)

## Visualize Reconstruction of Sampled Images *AFTER TRAINING*.

### note: these are images from the **TEST** data set: the auto-encoder was NOT trained on these!

In [None]:
# visualize generation after training
images, _ = next(iter(test_ds))

xs, _, _ = model(images) # latent vectors

fig = display_images(xs[0:64], rows=4, cols=16, dpi=196, labels=range(images.shape[0]))

# Comparison between Original Images and Reconstructions

In [None]:
images, _ = next(iter(test_ds))

mu, _, sigma = encoder(images)
zs = sampler((mu, sigma))
xs = decoder(zs)

np.random.seed(8675309)
idxs = np.random.uniform(0, len(images), size=10).astype(int)
imgs = np.concatenate([images.numpy()[idxs], xs.numpy()[idxs]])

fig = display_images(imgs, rows=2, cols=len(idxs), wspace=0.1, dpi=192)
# fig.savefig('recon.svg', format='svg', dpi=500)

<hr>
<h1> Cosine Similarity Over Learned Gaussians </h1>
<h2>
$
\begin{align}
\text{cosine similarity}(\vec{u}, \vec{v}) \equiv \frac{\vec{u} \cdot \vec{v}}{\| u \| \| v \|}
\end{align}
$
</h2>

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

## Illustration of Cosine Similarity Distance Metric 

In [None]:
# Examples for cosine similarity distance metric

# identical vectors
u = np.random.rand(100)
v = u

cosine_similarity([u, v])

In [None]:
# orthogonal vectors
u = np.random.rand(100)
v = np.random.rand(100)

# apply Gram-Schmidt to make v orthogonal to u
v -= v.dot(u) * u / np.linalg.norm(u)**2

cosine_similarity([u, v])

In [None]:
# opposite vectors
u = np.random.rand(100)
v = -u

cosine_similarity([u, v])

## Most Similar Exemplars (based on cosine similarity)

In [None]:
TOPS = 0
TROUSERS = 1
PULLOVER = 2
DRESS = 3
COAT = 4
SANDALS = 5
SHIRTS = 6
SNEAKERS = 7
BAGS = 8
ANKLE_BOOTS = 9

In [None]:
n_classes = 10
n_samples_per_class = 25

## Randomly select test images from each class in Fashion MNIST

In [None]:
np.random.seed(8675309)
imgs = np.concatenate([normalize(xtest[np.random.choice(np.reshape(np.argwhere(ytest==mnist_class), -1), 
                                                        n_samples_per_class, 
                                                        replace=False)])[0] 
                       for mnist_class in range(n_classes)], axis=0)

In [None]:
fig = display_images(imgs, rows=10, cols=25, dpi=300)

## Calculate Cosine Similarity Distance Matrix Using Means ($\vec{\mu}$) Of Latent Prob. Distributions

In [None]:
mu, _, sigma = encoder(imgs)
zs = sampler((mu, sigma))
xs = decoder(zs)

In [None]:
dist_matrix = cosine_similarity(mu)
dist_matrix = np.round(dist_matrix, decimals=4)

## Apply A Sigmoidal Activation Function (denoted $\alpha_c$) To Cosine Similarities (denoted $\delta$)

<h3>
Ideally, we want this activation function to have the following properties:

1. if $\delta(x,y)$ is close to 1.0 then $\delta(x,y) \approx \alpha_c(\delta(x,y))$
2. if $\delta(x,y)$ < instantiation threshold then $\alpha_c(\delta(x,y)$ should be dampened towards 0.0
                                                                         
</h3>


In [None]:
def curr_activation(x, a, b):
    return 1.0 / (1.0 + np.exp(-a*x+b))

# activation function parameters (a = 1.0, b = 0.0 gives standard logistic function)
a = 18.0 # steepness of curve
b = a * 0.666 # horizontal shift

x = np.linspace(-1.0, 1.0, 100)
y = curr_activation(x, a, b) 
plt.plot(x,y)

In [None]:
using_activation = True

# Passing the cosine similarity through a sigmoidal activation function
act_matrix = np.copy(dist_matrix)

if using_activation:
    act_matrix = curr_activation(act_matrix, a, b)
    
act_matrix = np.round(act_matrix, decimals=4)

## Create a Heatmap Showing $\alpha_c$

In [None]:
SMALL_SIZE = 10
MEDIUM_SIZE = 11
BIGGER_SIZE = 12

plt.rcParams['font.family'] ='Times New Roman'

plt.rc('font', size=BIGGER_SIZE)          # controls default text sizes
plt.rc('axes', labelsize=BIGGER_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=MEDIUM_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=MEDIUM_SIZE)    # fontsize of the tick labels

fig = plt.figure()
ax = fig.gca()
im = ax.imshow(act_matrix, cmap='hot', interpolation='gaussian')
ax.set_xticks(np.arange(0,len(imgs)+1,int((len(imgs))/10)))
ax.set_yticks(np.arange(0,len(imgs)+1,int((len(imgs))/10)))

ax.tick_params(axis='both', which='both', length=0)
ax.set_xticklabels([])
ax.set_yticklabels([])
ax.grid(which='major', color='gray', linestyle='-', linewidth=0.25)

cb = plt.colorbar(im)
cb.set_ticks([])
    
plt.show()

# uncomment to export figure
# fig.savefig('curr_act_heatmap.svg', format='svg', dpi=300)

## Find the Most (or Least) Similar Images for a Given Input

In [None]:
def get_n_similar(images, index, act_matrix, n, reversed=False):
    
    # ranks images by distance from reference image (descending order)
    ranked_indices_with_dist = np.array(
        sorted(
            np.array(list(enumerate(act_matrix[index]))), # matrix of image indices and their corresponding current activations
            key=lambda x: x[1], # key on activation
            reverse=not reversed)) # if reversed then least similar appear first in result
    
    return ranked_indices_with_dist[0:n,:]

In [None]:
n = 10
ref_ndxs = range(0,n_samples_per_class*n_classes, n_samples_per_class)

curr_class = 0
for ndx in ref_ndxs:
    sims = get_n_similar(imgs, ndx, act_matrix, n)
    
    indices = sims[:,0].astype(int)
    dists = list(map(str, np.round(sims[:,1], decimals=2)))
    
    fig = display_images(imgs[indices], rows=1, cols=len(indices), labels=dists, dpi=100)
    
    # uncomment to export figure
    # fig.savefig('{cls}_distance_from_ref_image.svg'.format(cls=curr_class), format='svg', dpi=100)
    
    curr_class += 1

## K-nearest neighbors (KNN) using latent similarities and activation function

In [None]:
n_exemplars = dist_matrix.shape[0]

conf_matrix = np.zeros(shape=(n_classes + 1, n_classes + 1))
k = 5
threhold = 0.0 # instantiation threshold

UNK_VALUE = np.array([[n_exemplars + 1, 0.0]])

for ndx in range(n_exemplars):
    
    # k most similar (removing "best match", which will be a self-reference)
    k_similar_set = get_n_similar(imgs, ndx, act_matrix, k + 1)[1:]
    
    # remove examples below threshold
    k_similar_set = k_similar_set[k_similar_set[:,1] > threhold]
    
    # if all examples below threshold add UNKNOWN index
    if len(k_similar_set) == 0:
        k_similar_set = UNK_VALUE
               
    # calculate the object classes from indicies for most (cosine) similar
    obj_classes = k_similar_set[:,0].astype(int) // n_samples_per_class
    
    # determine best object class as object class with max number of members in most_similar_set
    predicted_class = np.argmax(np.bincount(obj_classes))
    actual_class = ndx // n_samples_per_class 
                
    conf_matrix[actual_class,predicted_class] += 1
        
print('overall accuracy: ', sum(np.diag(conf_matrix[0:10, 0:10]))/ np.sum(conf_matrix[0:10, 0:10]))

In [None]:
SMALL_SIZE = 10
MEDIUM_SIZE = 11
BIGGER_SIZE = 12

plt.rcParams['font.family'] ='Times New Roman'

plt.rc('font', size=BIGGER_SIZE)          # controls default text sizes
plt.rc('axes', labelsize=BIGGER_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=MEDIUM_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=MEDIUM_SIZE)    # fontsize of the tick labels

fig = plt.figure()

ax = sns.heatmap(conf_matrix, annot=True, vmax=25, linewidths=.5, cmap="gist_gray", xticklabels=True, yticklabels=True)
ax.set_xlabel("Object Class (Predicted)")
ax.set_ylabel("Object Class (Actual)")

# fix issue with plot cutting off top and bottom of image
b, t = ax.get_ylim()
b += 0.5
t -= 0.5
ax.set_ylim(b,t)

plt.show()
# fig.savefig('knn_confmatrix.svg', format='svg')

## Comparing (Cosine) Similarity Between $\vec{\mu}$ Of Images and Their Reconstructions

In [None]:
mus, _, sigmas = encoder(imgs)
zs = sampler((mus, sigmas))
recons = decoder(zs)

In [None]:
recon_mus, _, sigma = encoder(recons)

## For a Single Instance...

In [None]:
def display_image_and_recon(ndx):
    orig = imgs[ndx]
    orig_mu = np.expand_dims(mus[ndx], 0)

    recon = recons[ndx]
    recon_mu = np.expand_dims(recon_mus[ndx], 0)

    fig = display_images([orig, recon], rows=1, cols=2, dpi=90, labels=['original', 'reconstruction'])

In [None]:
display_image_and_recon(ndx=0)

## Cosine Similarity Matrix Between $\vec{\mu}$ for Images and Their Reconstructions

In [None]:
recon_dist_matrix = cosine_similarity(recon_mus, mus)

### Most and Least Similar (Between Images and Their Reconstructions)

In [None]:
ndx_least_similar, ndx_most_similar = np.argmin(np.diag(recon_dist_matrix)), np.argmax(np.diag(recon_dist_matrix))

display_image_and_recon(ndx_most_similar)
display_image_and_recon(ndx_least_similar)

## Activation Heatmap Between Reconstructions and Original Images

In [None]:
recon_act_matrix = np.copy(recon_dist_matrix)

# Passing the cosine similarity through a sigmoidal activation function
recon_act_matrix = 1.0 / (1.0 + np.exp(-15.0*recon_act_matrix+10.0))
recon_act_matrix = np.round(recon_act_matrix, decimals=2)

In [None]:
SMALL_SIZE = 10
MEDIUM_SIZE = 11
BIGGER_SIZE = 12

plt.rcParams['font.family'] ='Times New Roman'

plt.rc('font', size=BIGGER_SIZE)          # controls default text sizes
plt.rc('axes', labelsize=BIGGER_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=MEDIUM_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=MEDIUM_SIZE)    # fontsize of the tick labels

fig = plt.figure()
ax = fig.gca()
im = ax.imshow(recon_act_matrix, cmap='hot', interpolation='gaussian')
ax.set_xticks(np.arange(0,len(imgs)+1,int((len(imgs))/10)))
ax.set_yticks(np.arange(0,len(imgs)+1,int((len(imgs))/10)))

ax.tick_params(axis='both', which='both', length=0)
ax.set_xticklabels([])
ax.set_yticklabels([])
ax.grid(which='major', color='gray', linestyle='-', linewidth=0.25)

cb = plt.colorbar(im)
cb.set_ticks([])
    
plt.show()
fig.savefig('recon_curr_act_heatmap.svg', format='svg', dpi=300)

In [None]:
sns.distplot(np.diag(recon_act_matrix))

In [None]:
n_exemplars = recon_act_matrix.shape[0]

conf_matrix = np.zeros(shape=(n_classes + 1, n_classes + 1))
k = 5
act_threhold = 0.0

UNK_VALUE = np.array([[n_exemplars + 1, 0.0]])

for ndx in range(n_exemplars):
    
    # k most similar (removing "best match", which will be a self-reference)
    k_similar_set = get_n_similar(imgs, ndx, recon_act_matrix, k + 1)[1:]
    
    # remove examples below threshold
    k_similar_set = k_similar_set[k_similar_set[:,1] > act_threhold]
    
    # if all examples below threshold add UNKNOWN index
    if len(k_similar_set) == 0:
        k_similar_set = UNK_VALUE
               
    # calculate the object classes from indicies for most (cosine) similar
    obj_classes = k_similar_set[:,0].astype(int) // n_samples_per_class
    
    # determine best object class as object class with max number of members in most_similar_set
    predicted_class = np.argmax(np.bincount(obj_classes))
    actual_class = ndx // n_samples_per_class 
                
    conf_matrix[actual_class,predicted_class] += 1
        
print('overall accuracy: ', sum(np.diag(conf_matrix[0:10, 0:10]))/ np.sum(conf_matrix[0:10, 0:10]))

In [None]:
SMALL_SIZE = 10
MEDIUM_SIZE = 11
BIGGER_SIZE = 12

plt.rcParams['font.family'] ='Times New Roman'

plt.rc('font', size=BIGGER_SIZE)          # controls default text sizes
plt.rc('axes', labelsize=BIGGER_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=MEDIUM_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=MEDIUM_SIZE)    # fontsize of the tick labels

fig = plt.figure()

ax = sns.heatmap(conf_matrix, annot=True, vmax=25, linewidths=.5, cmap="gist_gray", xticklabels=True, yticklabels=True)
ax.set_xlabel("Object Class")
ax.set_ylabel("Object Class")

# fix issue with plot cutting off top and bottom of image
b, t = ax.get_ylim()
b += 0.5
t -= 0.5
ax.set_ylim(b,t)

plt.show()

# uncomment to export image
# fig.savefig('knn_confmatrix.svg', format='svg')