# Loading and Verifying Previously Normalized Image Data

https://becominghuman.ai/understand-and-apply-capsnet-on-traffic-sign-classification-a592e2d4a4ea

https://www.zhihu.com/question/67287444

https://zhuanlan.zhihu.com/p/29435406

https://github.com/naturomics/CapsNet-Tensorflow

https://github.com/XifengGuo/CapsNet-Keras

In [None]:
import pickle
import csv
import numpy as np

AugumentedDataSetName='TrafficSignNormalized_0.5.pickle'
with open(AugumentedDataSetName, mode='rb') as f:
    AugumentedData = pickle.load(f)
    
X_train, y_train = AugumentedData['X_train'], AugumentedData['y_train']
X_valid, y_valid = AugumentedData['X_valid'], AugumentedData['y_valid']
X_test, y_test = AugumentedData['X_test'], AugumentedData['y_test']
AugumentedData=None

# TODO: Number of training examples
n_train = len(X_train)

# TODO: Number of validation examples
n_validation = len(X_valid)

# TODO: Number of testing examples.
n_test = len(X_test)

# TODO: What's the shape of an traffic sign image?
image_shape = (X_train.shape)[1:4]
image_width = image_shape[0]
image_height = image_shape[1]
image_channel = image_shape[2]

# TODO: How many unique classes/labels there are in the dataset.
n_classes = len(np.unique(y_train))

print("Number of training examples =", n_train)
print("Number of validation examples =", n_validation)
print("Number of testing examples =", n_test)
print("Image data shape =", image_width,image_height,image_channel)
print("Number of classes =", n_classes)

X_train+=0.5
X_valid+=0.5
X_test+=0.5

In [None]:
from sklearn.utils import shuffle
X_train, y_train = shuffle(X_train, y_train)

In [None]:
### Data exploration visualization code goes here.
### Feel free to use as many code cells as needed.
import matplotlib.pyplot as plt
import cv2
# Visualizations will be shown in the notebook.
%matplotlib inline

def GetLabelImage(index): 
    path="./Data"
    image=cv2.imread(path+'/signnames_all.jpg',1)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    H, W, _ = image.shape
    dY = H/7.
    dX = W/7.105
    y = index//7
    x = index%7 
    labelImage = image[round(y*dY):round(y*dY+dY), round(x*dX):round(x*dX+dX),:]
    labelImage = cv2.resize(labelImage, (0,0), fx=32./dX, fy=32./dY,)
    labelImage = labelImage.astype(float)/255
    return labelImage

def InsertSubimage(image, subImage, y, x):
    h, w, _ = subImage.shape
    image[y:y+h, x:x+w, :]=subImage
    return image

def CreateDataSummary(images, labels, sampleNumber, multiplier):
    imageShape = (images.shape)[1:4]
    all_labels=np.unique(labels).astype(int)
    classNumber = np.amax(all_labels)+1
    subimageWidth=imageShape[1]
    subimageHeight=imageShape[0];
    resultImageWidth=subimageWidth*(classNumber);
    resultImageHeight=subimageHeight*sampleNumber;
    resultImageChanel=imageShape[2];
    resultImage = 1.0*np.ones(shape=(resultImageHeight,resultImageWidth,resultImageChanel),dtype=np.float32)
    cv2.putText(resultImage, "Training data histogram", (int(subimageWidth*0.01), int(resultImageHeight*0.05)),cv2.FONT_HERSHEY_SIMPLEX,1,(0,0,0),2)
    for index in all_labels:
        index=int(index)
        labelImage = GetLabelImage(index)
        InsertSubimage(resultImage, labelImage, resultImageHeight-subimageHeight*2, subimageWidth*index)
        indexList = list(np.where(labels== index)[0])
        count=len(indexList)
        if(count>0):
            meanImage = np.average(images[indexList], axis=0)
            InsertSubimage(resultImage, meanImage, resultImageHeight-subimageHeight*3, subimageWidth*index)
        totalDisplaySample=sampleNumber*multiplier;
        percentage = float(count)/float(len(labels))
        numberDisplaySample=int(totalDisplaySample*count/len(labels))
        for i in range(numberDisplaySample):
            sampleImage=images[np.random.choice(indexList)]
            if(resultImageHeight-subimageHeight*(7+i)<0):
                break
            else:
                InsertSubimage(resultImage, sampleImage, resultImageHeight-subimageHeight*(3+i), subimageWidth*index)
        cv2.putText(resultImage, '{:.0%}'.format(percentage), (int(subimageWidth*index), int(resultImageHeight-subimageHeight*0.5)),cv2.FONT_HERSHEY_SIMPLEX,0.5,(0,0,0),2)
    cv2.line(resultImage,(0,resultImageHeight-subimageHeight*3),(resultImageWidth,resultImageHeight-subimageHeight*3),(0,0,0.5),3)
    #cv2.imwrite(path+'/train_data_summary.jpg',cv2.cvtColor(resultImage, cv2.COLOR_BGR2RGB))
    return resultImage

In [None]:
#print("Origional Dataset:", n_train,"Entire Dataset:",len(X_train))
#resultImage=CreateDataSummary(X_train,y_train,sampleNumber=30, multiplier=64)
#plt.rcParams["figure.figsize"] = (20,30)
#plt.imshow(resultImage)
#plt.axis('off') 
#plt.show()

# Build Capsnet Model

## Setup the Environment

In [None]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

import numpy as np
import tensorflow as tf

tf.reset_default_graph()

np.random.seed(42)
tf.set_random_seed(42)

## Input Images
Let's start by creating a placeholder for the input images

In [None]:
X = tf.placeholder(shape=[None, image_width, image_height, image_channel], dtype=tf.float32, name="X")
print("image_width=",image_width)
print("image_height=",image_height)

## ReLu Conv1
The first layer is a standard convolutional layer

In [None]:
conv1_params = {
    "filters": 256,   #Configureable
    "kernel_size": 9, #Configurable
    "strides": 1,     #Configurable
    "padding": "valid",
    "activation": tf.nn.relu,
}
conv1_raw = tf.layers.conv2d(X, name="conv1", **conv1_params)
conv1_keep_prob = tf.placeholder(tf.float32)
conv1 = tf.nn.dropout(conv1_raw, conv1_keep_prob)  

In [None]:
conv1

## Primary Capsules

In [None]:
def capsule_layer(input_layer,kernel_size=6,strides=2,
                  caps1_n_maps=16,caps1_n_dims=5,
                 caps2_n_caps=43,caps2_n_dims=32):
    
    def squash(s, axis=-1, epsilon=1e-7, name=None):
        with tf.name_scope(name, default_name="squash"):
            squared_norm = tf.reduce_sum(tf.square(s), axis=axis,
                                         keep_dims=True)
            safe_norm = tf.sqrt(squared_norm + epsilon)
            squash_factor = squared_norm / (1. + squared_norm)
            unit_vector = s / safe_norm
            return squash_factor * unit_vector
        
    def primary_capsule(input_layer,kernel_size,strides,caps1_n_maps,caps1_n_dims):
        conv2_params = {
            "filters": caps1_n_maps * caps1_n_dims, # 256 convolutional filters
            "kernel_size": kernel_size, #Configureable //need to change to 5
            "strides": strides,     #Configureable
            "padding": "valid",
            "activation": tf.nn.relu
        }
        
        input_width = (int)(input_layer.shape[2])
        input_height = (int)(input_layer.shape[1])
        caps1_n_caps = caps1_n_maps * input_width * input_height  #Primary Capsule Unit Count
        print(caps1_n_caps)
        conv2 = tf.layers.conv2d(input_layer, **conv2_params)
        caps1_raw = tf.reshape(conv2, [-1, caps1_n_caps, caps1_n_dims])
        caps1_output = squash(caps1_raw, )
        return caps1_output
    
    def digit_capsule(primary_layer,caps2_n_caps=43,caps2_n_dims=32):
        init_sigma = 0.01
        batch_size = tf.shape(primary_layer)[0]
        caps1_n_caps = (int)(primary_layer.shape[1])
       
        W_init = tf.random_normal(
        shape=(1, caps1_n_caps, caps2_n_caps, caps2_n_dims, caps1_n_dims),
                stddev=init_sigma, dtype=tf.float32)
        W = tf.Variable(W_init)
       
        W_tiled = tf.tile(W, [batch_size, 1, 1, 1, 1])
        
        caps1_output_expanded = tf.expand_dims(primary_layer, -1)
        caps1_output_tile = tf.expand_dims(caps1_output_expanded, 2)
        caps1_output_tiled = tf.tile(caps1_output_tile, [1, 1, caps2_n_caps, 1, 1])
        caps2_predicted = tf.matmul(W_tiled, caps1_output_tiled)
        
        #Routing by agreement
        raw_weights = tf.zeros([batch_size, caps1_n_caps, caps2_n_caps, 1, 1],dtype=np.float32)
        #Round 1
        routing_weights = tf.nn.softmax(raw_weights, dim=2)
        weighted_predictions = tf.multiply(routing_weights, caps2_predicted)
        weighted_sum = tf.reduce_sum(weighted_predictions, axis=1, keep_dims=True)
        caps2_output_round_1 = squash(weighted_sum, axis=-2)
        #Round 2
        caps2_output_round_1_tiled = tf.tile(caps2_output_round_1, [1, caps1_n_caps, 1, 1, 1])
        agreement = tf.matmul(caps2_predicted, caps2_output_round_1_tiled, transpose_a=True)
        raw_weights_round_2 = tf.add(raw_weights, agreement)
        
        routing_weights_round_2 = tf.nn.softmax(raw_weights_round_2,dim=2)
        weighted_predictions_round_2 = tf.multiply(routing_weights_round_2,caps2_predicted)
        weighted_sum_round_2 = tf.reduce_sum(weighted_predictions_round_2,axis=1, keep_dims=True)
        caps2_output_round_2 = squash(weighted_sum_round_2,axis=-2)
        return caps2_output_round_2
    
    primary_layer = primary_capsule(input_layer,kernel_size,strides,caps1_n_maps,caps1_n_dims)
    print(primary_layer)
    digit_layer = digit_capsule(primary_layer,caps2_n_caps,caps2_n_dims)
    return digit_layer

In [None]:
caps2_output = capsule_layer(conv1,kernel_size=6,strides=2,caps1_n_maps=16,caps1_n_dims=5,
                 caps2_n_caps=43,caps2_n_dims=32)

# Estimated Class Probabilities (Length)

In [None]:
def safe_norm(s, axis=-1, epsilon=1e-7, keep_dims=False, name=None):
    with tf.name_scope(name, default_name="safe_norm"):
        squared_norm = tf.reduce_sum(tf.square(s), axis=axis,
                                     keep_dims=keep_dims)
        return tf.sqrt(squared_norm + epsilon)

In [None]:
y_proba = safe_norm(caps2_output, axis=-2, name="y_proba")

y_proba_argmax = tf.argmax(y_proba, axis=2, name="y_proba")

In [None]:
y_proba_argmax = tf.argmax(y_proba, axis=2, name="y_proba")
print(y_proba_argmax)

In [None]:
y_pred = tf.squeeze(y_proba_argmax, axis=[1,2], name="y_pred")
print(y_pred)

Okay, we are now ready to define the training operations, starting with the losses.

# Labels

First, we will need a placeholder for the labels:

In [None]:
y = tf.placeholder(shape=[None], dtype=tf.int64, name="y")

# Margin loss

The paper uses a special margin loss to make it possible to detect two or more different digits in each image:

$ L_k = T_k \max(0, m^{+} - \|\mathbf{v}_k\|)^2 - \lambda (1 - T_k) \max(0, \|\mathbf{v}_k\| - m^{-})^2$

* $T_k$ is equal to 1 if the digit of class $k$ is present, or 0 otherwise.
* In the paper, $m^{+} = 0.9$, $m^{-} = 0.1$ and $\lambda = 0.5$.
* Note that there was an error in the video (at 15:47): the max operations are squared, not the norms. Sorry about that.

In [None]:
m_plus = 0.9
m_minus = 0.1
lambda_ = 0.5

Since `y` will contain the digit classes, from 0 to 9, to get $T_k$ for every instance and every class, we can just use the `tf.one_hot()` function:

In [None]:
T = tf.one_hot(y, depth=caps2_n_caps, name="T")

In [None]:
caps2_output

The 16D output vectors are in the second to last dimension, so let's use the `safe_norm()` function with `axis=-2`:

In [None]:
caps2_output_norm = safe_norm(caps2_output, axis=-2, keep_dims=True, name="caps2_output_norm")

Now let's compute $\max(0, m^{+} - \|\mathbf{v}_k\|)^2$, and reshape the result to get a simple matrix of shape (_batch size_, 10):

In [None]:
present_error_raw = tf.square(tf.maximum(0., m_plus - caps2_output_norm),
                              name="present_error_raw")
present_error = tf.reshape(present_error_raw, shape=(-1, n_classes),
                           name="present_error")

Next let's compute $\max(0, \|\mathbf{v}_k\| - m^{-})^2$ and reshape it:

In [None]:
absent_error_raw = tf.square(tf.maximum(0., caps2_output_norm - m_minus),
                             name="absent_error_raw")
absent_error = tf.reshape(absent_error_raw, shape=(-1, n_classes),
                          name="absent_error")

We are ready to compute the loss for each instance and each digit:

In [None]:
L = tf.add(T * present_error, lambda_ * (1.0 - T) * absent_error, name="L")

Now we can sum the digit losses for each instance ($L_0 + L_1 + \cdots + L_9$), and compute the mean over all instances. This gives us the final margin loss:

In [None]:
margin_loss = tf.reduce_mean(tf.reduce_sum(L, axis=1), name="margin_loss")

# Reconstruction

Now let's add a decoder network on top of the capsule network. It is a regular 3-layer fully connected neural network which will learn to reconstruct the input images based on the output of the capsule network. This will force the capsule network to preserve all the information required to reconstruct the digits, across the whole network. This constraint regularizes the model: it reduces the risk of overfitting the training set, and it helps generalize to new digits.

## Mask

The paper mentions that during training, instead of sending all the outputs of the capsule network to the decoder network, we must send only the output vector of the capsule that corresponds to the target digit. All the other output vectors must be masked out. At inference time, we must mask all output vectors except for the longest one, i.e., the one that corresponds to the predicted digit. You can see this in the paper's figure 2 (at 18:15 in the video): all output vectors are masked out, except for the reconstruction target's output vector.

We need a placeholder to tell TensorFlow whether we want to mask the output vectors based on the labels (`True`) or on the predictions (`False`, the default):

In [None]:
mask_with_labels = tf.placeholder_with_default(False, shape=(), name="mask_with_labels")

Now let's use `tf.cond()` to define the reconstruction targets as the labels `y` if `mask_with_labels` is `True`, or `y_pred` otherwise.

In [None]:
reconstruction_targets = tf.cond(mask_with_labels, # condition
                                 lambda: y,        # if True
                                 lambda: y_pred,   # if False
                                 name="reconstruction_targets")

Note that the `tf.cond()` function expects the if-True and if-False tensors to be passed _via_ functions: these functions will be called just once during the graph construction phase (not during the execution phase), similar to `tf.while_loop()`. This allows TensorFlow to add the necessary operations to handle the conditional evaluation of the if-True or if-False tensors. However, in our case, the tensors `y` and `y_pred` are already created by the time we call `tf.cond()`, so unfortunately TensorFlow will consider both `y` and `y_pred` to be dependencies of the `reconstruction_targets` tensor. The `reconstruction_targets` tensor will end up with the correct value, but:
1. whenever we evaluate a tensor that depends on `reconstruction_targets`, the `y_pred` tensor will be evaluated (even if `mask_with_layers` is `True`). This is not a big deal because computing `y_pred` adds no computing overhead during training, since we need it anyway to compute the margin loss. And during testing, if we are doing classification, we won't need reconstructions, so `reconstruction_targets` won't be evaluated at all.
2. we will always need to feed a value for the `y` placeholder (even if `mask_with_layers` is `False`). This is a bit annoying, but we can pass an empty array, because TensorFlow won't use it anyway (it just does not know it yet when it checks for dependencies).

Now that we have the reconstruction targets, let's create the reconstruction mask. It should be equal to 1.0 for the target class, and 0.0 for the other classes, for each instance. For this we can just use the `tf.one_hot()` function:

In [None]:
reconstruction_mask = tf.one_hot(reconstruction_targets,
                                 depth=caps2_n_caps,
                                 name="reconstruction_mask")
print(reconstruction_mask)

Let's compare this to the shape of `caps2_output`:

In [None]:
caps2_output

Mmh, its shape is (batch size, 1, 10, 16, 1). We want to multiply it by the reconstruction_mask, but the shape of the reconstruction_mask is (batch size, 10). We must reshape it to (batch size, 1, 10, 1, 1) to make multiplication possible:

In [None]:
reconstruction_mask_reshaped = tf.reshape(reconstruction_mask, [-1, 1, caps2_n_caps, 1, 1]
                                          ,name="reconstruction_mask_reshaped")
print(reconstruction_mask_reshaped)

At last! We can apply the mask:

In [None]:
caps2_output_masked = tf.multiply(
    caps2_output, reconstruction_mask_reshaped,
    name="caps2_output_masked")
print(caps2_output_masked)

One last reshape operation to flatten the decoder's inputs:

In [None]:
decoder_input = tf.reshape(caps2_output_masked,
                           [-1, caps2_n_caps * caps2_n_dims],
                           name="decoder_input")
print(decoder_input)

## Decoder

Now let's build the decoder. It's quite simple: two dense (fully connected) ReLU layers followed by a dense output sigmoid layer:

In [None]:
n_hidden1 = 512 #Configurable
n_hidden2 = 1024 #Configurable
n_output = image_width * image_height * image_channel

In [None]:
with tf.name_scope("decoder"):
    hidden1 = tf.layers.dense(decoder_input, n_hidden1,
                              activation=tf.nn.relu,
                              name="hidden1")
    hidden2 = tf.layers.dense(hidden1, n_hidden2,
                              activation=tf.nn.relu,
                              name="hidden2")
    decoder_output = tf.layers.dense(hidden2, n_output,
                                     activation=tf.nn.sigmoid,
                                     name="decoder_output")

## Reconstruction Loss

Now let's compute the reconstruction loss. It is just the squared difference between the input image and the reconstructed image:

In [None]:
X_flat = tf.reshape(X, [-1, n_output], name="X_flat")
squared_difference = tf.square(X_flat - decoder_output,
                               name="squared_difference")
reconstruction_loss = tf.reduce_sum(squared_difference,
                                    name="reconstruction_loss")

## Final Loss

The final loss is the sum of the margin loss and the reconstruction loss (scaled down by a factor of 0.0005 to ensure the margin loss dominates training):

In [None]:
alpha = 0.0005

loss = tf.add(margin_loss, alpha * reconstruction_loss, name="loss")

# Final Touches

## Accuracy

To measure our model's accuracy, we need to count the number of instances that are properly classified. For this, we can simply compare `y` and `y_pred`, convert the boolean value to a float32 (0.0 for False, 1.0 for True), and compute the mean over all the instances:

In [None]:
correct = tf.equal(y, y_pred, name="correct")
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

## Training Operations

The paper mentions that the authors used the Adam optimizer with TensorFlow's default parameters:

In [None]:
learning_rate = tf.placeholder(tf.float32,name="learning_rate")
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss, name="training_op")

## Init and Saver

And let's add the usual variable initializer, as well as a `Saver`:

In [None]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()

And... we're done with the construction phase! Please take a moment to celebrate. :)

# Trainable Parameters

In [None]:
total_parameters = 0
for variable in tf.trainable_variables():
    # shape is an array of tf.Dimension
    shape = variable.get_shape()
    print(shape)
    print(len(shape))
    variable_parameters = 1
    for dim in shape:
        print(dim)
        variable_parameters *= dim.value
    print(variable_parameters)
    total_parameters += variable_parameters
print(total_parameters)

# Training

Training our capsule network is pretty standard. For simplicity, we won't do any fancy hyperparameter tuning, dropout or anything, we will just run the training operation over and over again, displaying the loss, and at the end of each epoch, measure the accuracy on the validation set, display it, and save the model if the validation loss is the lowest seen found so far (this is a basic way to implement early stopping, without actually stopping). Hopefully the code should be self-explanatory, but here are a few details to note:
* if a checkpoint file exists, it will be restored (this makes it possible to interrupt training, then restart it later from the last checkpoint),
* we must not forget to feed `mask_with_labels=True` during training,
* during testing, we let `mask_with_labels` default to `False` (but we still feed the labels since they are required to compute the accuracy),
* the images loaded _via_ `mnist.train.next_batch()` are represented as `float32` arrays of shape \[784\], but the input placeholder `X` expects a `float32` array of shape \[28, 28, 1\], so we must reshape the images before we feed them to our model,
* we evaluate the model's loss and accuracy on the full validation set (5,000 instances). To view progress and support systems that don't have a lot of RAM, the code evaluates the loss and accuracy on one batch at a time, and computes the mean loss and mean accuracy at the end.

*Warning*: if you don't have a GPU, training will take a very long time (at least a few hours). With a GPU, it should take just a few minutes per epoch (e.g., 6 minutes on an NVidia GeForce GTX 1080Ti).

In [None]:
n_epochs = 3
batch_size = 10
keep_prob = 0.7
learn_rate =0.0001
restore_checkpoint = True
num_train_examples = 10#len(X_train)
num_valid_examples = 10#len(X_valid)

n_iterations_per_epoch = num_train_examples // batch_size
n_iterations_validation = num_valid_examples // batch_size

best_loss_val = np.infty
checkpoint_path = "./Capsnet_Model_FC_Rev3_OrigionalRecon/my_capsule_network"

In [None]:

with tf.Session() as sess:
    if restore_checkpoint and tf.train.checkpoint_exists(checkpoint_path):
        saver.restore(sess, checkpoint_path)
    else:
        init.run()
    
    num_examples = len(X_train)
    
    for epoch in range(n_epochs):
        X_train, y_train = shuffle(X_train, y_train)
        for offset in range(0, num_train_examples, batch_size):
            end = offset + batch_size
            batch_x, batch_y = X_train[offset:end], y_train[offset:end]
            # Run the training operation and measure the loss:
            _, loss_train = sess.run([training_op, loss],
                feed_dict={learning_rate:learn_rate,
                            X: batch_x.reshape([-1, image_width, image_height, image_channel]),
                           y: batch_y,
                           mask_with_labels: True,
                           conv1_keep_prob: keep_prob})
            iteration=offset//batch_size
            print("\rIteration: {}/{} ({:.1f}%)  Loss: {:.5f}".format(
                      iteration, n_iterations_per_epoch,
                      iteration * 100 / n_iterations_per_epoch,
                      loss_train), end="")

        # At the end of each epoch,
        # measure the validation loss and accuracy:
        
        loss_vals = []
        acc_vals = []
        for offset in range(0, num_valid_examples, batch_size):
            end = offset + batch_size
            X_batch, y_batch = X_valid[offset:end], y_valid[offset:end]
            
            loss_val, acc_val = sess.run([loss, accuracy],
                    feed_dict={X: X_batch.reshape([-1, image_width, image_height, image_channel]),
                               y: y_batch,
                               conv1_keep_prob: 1.0})
            
            loss_vals.append(loss_val)
            acc_vals.append(acc_val)
            iteration=offset//batch_size
            print("\rEvaluating the model: {}/{} ({:.1f}%)".format(
                      iteration, n_iterations_validation,
                      iteration * 100 / n_iterations_validation),
                  end=" " * 10)
        loss_val = np.mean(loss_vals)
        acc_val = np.mean(acc_vals)
        print("\rEpoch: {}  Val accuracy: {:.4f}%  Loss: {:.6f}{}".format(
            epoch + 1, acc_val * 100, loss_val,
            " (improved)" if loss_val < best_loss_val else ""))

        # And save the model if it improved:
        
        if loss_val < best_loss_val:
            save_path = saver.save(sess, checkpoint_path)
            best_loss_val = loss_val
        
        

# Evaluation

In [None]:
batch_size = 10
num_test_examples = 10 #len(X_test)//400
n_iterations_test = num_test_examples // batch_size

with tf.Session() as sess:
    saver.restore(sess, checkpoint_path)
    loss_tests = []
    acc_tests = []
    
    for offset in range(0, num_test_examples, batch_size):
        end = offset + batch_size
        X_batch, y_batch = X_test[offset:end], y_test[offset:end]
        loss_test, acc_test = sess.run([loss, accuracy],
                feed_dict={X: X_batch.reshape([-1, image_width, image_height, image_channel]),
                           y: y_batch,
                           conv1_keep_prob: 1.0})
        loss_tests.append(loss_test)
        acc_tests.append(acc_test)
        #print("loss",loss_test,"accuracy",acc_test)
        iteration=offset//batch_size
        print("\rEvaluating the model: {}/{} ({:.1f}%)".format(
                  iteration, n_iterations_test,
                  iteration * 100 / n_iterations_test),
              end=" " * 10)
    loss_test = np.mean(loss_tests)
    acc_test = np.mean(acc_tests)
    print("\rFinal test accuracy: {:.4f}%  Loss: {:.6f}".format(
        acc_test * 100, loss_test))


# Predictions

Now let's make some predictions! We first fix a few images from the test set, then we start a session, restore the trained model, evaluate `caps2_output` to get the capsule network's output vectors, `decoder_output` to get the reconstructions, and `y_pred` to get the class predictions:

In [None]:
n_samples = 7

data_set = X_test
data_label = y_test
maximum_index = len(data_set)
indexes = np.random.randint(low=0,high=maximum_index,size = n_samples )

sample_images = data_set[indexes].reshape([-1, image_width, image_width, image_channel])

with tf.Session() as sess:
    saver.restore(sess, checkpoint_path)
    caps2_output_value, decoder_output_value, y_pred_value = sess.run(
            [caps2_output, decoder_output, y_pred],
            feed_dict={X: sample_images,
                       y: np.array([], dtype=np.int64),
                       conv1_keep_prob: 1.0})

Note: we feed `y` with an empty array, but TensorFlow will not use it, as explained earlier.

And now let's plot the images and their labels, followed by the corresponding reconstructions and predictions:

In [None]:
sample_images = sample_images.reshape(-1, image_width, image_height, image_channel)
reconstructions = decoder_output_value.reshape([-1, image_width, image_height, image_channel])

plt.figure(figsize=(n_samples * 2, 3))
for index, j in zip(range(n_samples),np.nditer(indexes)):
    plt.subplot(1, n_samples, index + 1)
    plt.imshow(sample_images[index], cmap="binary")
    plt.title("Label:" + str(data_label[j]))
    plt.axis("off")

plt.show()

plt.figure(figsize=(n_samples * 2, 3))
for index in range(n_samples):
    plt.subplot(1, n_samples, index + 1)
    plt.title("Predicted:" + str(y_pred_value[index]))
    plt.imshow(reconstructions[index], cmap="binary")
    plt.axis("off")
    
plt.show()

# Interpreting the Output Vectors

Let's tweak the output vectors to see what their pose parameters represent.

First, let's check the shape of the `cap2_output_value` NumPy array:

In [None]:
caps2_output_value.shape

Let's create a function that will tweak each of the 16 pose parameters (dimensions) in all output vectors. Each tweaked output vector will be identical to the original output vector, except that one of its pose parameters will be incremented by a value varying from -0.5 to 0.5. By default there will be 11 steps (-0.5, -0.4, ..., +0.4, +0.5). This function will return an array of shape (_tweaked pose parameters_=16, _steps_=11, _batch size_=5, 1, 10, 16, 1):

In [None]:
def tweak_pose_parameters(output_vectors, min=-0.5, max=0.5, n_steps=11):
    steps = np.linspace(min, max, n_steps) # -0.25, -0.15, ..., +0.25
    pose_parameters = np.arange(caps2_n_dims) # 0, 1, ..., 15
    tweaks = np.zeros([caps2_n_dims, n_steps, 1, 1, 1, caps2_n_dims, 1])
    tweaks[pose_parameters, :, 0, 0, 0, pose_parameters, 0] = steps
    output_vectors_expanded = output_vectors[np.newaxis, np.newaxis]
    return tweaks + output_vectors_expanded

Let's compute all the tweaked output vectors and reshape the result to (_parameters_×_steps_×_instances_, 1, 10, 16, 1) so we can feed the array to the decoder:

In [None]:
n_steps = 11

tweaked_vectors = tweak_pose_parameters(caps2_output_value, n_steps=n_steps)
tweaked_vectors_reshaped = tweaked_vectors.reshape(
    [-1, 1, caps2_n_caps, caps2_n_dims, 1])

In [None]:
#n_samples = 7

#data_set = X_test
#data_label = y_test
#maximum_index = len(data_set)
#indexes = np.random.randint(low=0,high=maximum_index,size = n_samples )
#sample_images = data_set[indexes].reshape([-1, image_width, image_width, image_channel])

tweak_labels = np.tile(data_label[indexes], caps2_n_dims * n_steps)

with tf.Session() as sess:
    saver.restore(sess, checkpoint_path)
    decoder_output_value = sess.run(
            decoder_output,
            feed_dict={caps2_output: tweaked_vectors_reshaped,
                       mask_with_labels: True,
                       y: tweak_labels})

Let's reshape the decoder's output so we can easily iterate on the output dimension, the tweak steps, and the instances:

In [None]:
tweak_reconstructions = decoder_output_value.reshape(
        [caps2_n_dims, n_steps, n_samples, image_width, image_height,image_channel])

In [None]:
for dim in range(caps2_n_dims):
    print("Tweaking output dimension #{}".format(dim))
    plt.figure(figsize=(n_steps / 1.0, n_samples / 1.0))
    
    for row in range(n_samples):
        for col in range(n_steps):        
            plt.subplot(n_samples, n_steps, row * n_steps + col + 1)
            plt.imshow(tweak_reconstructions[dim, col, row], cmap="binary")
            plt.axis("off")
    plt.show()