In [1]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

import numpy as np
import tensorflow as tf

tf.reset_default_graph()

np.random.seed(42)
tf.set_random_seed(42)

In [2]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/")

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [11]:
def capsule_layer(input_layer,kernel_size=6,strides=2,
                  primary_block=32,primary_depth=8,
                  secondary_block=10,secondary_depth=16):
    
    def squash(s, axis=-1, epsilon=1e-7, name=None):
        with tf.name_scope(name, default_name="squash"):
            squared_norm = tf.reduce_sum(tf.square(s), axis=axis,
                                         keep_dims=True)
            safe_norm = tf.sqrt(squared_norm + epsilon)
            squash_factor = squared_norm / (1. + squared_norm)
            unit_vector = s / safe_norm
            return squash_factor * unit_vector
        
    def primary_capsule(input_layer,kernel_size,strides,primary_block,primary_depth):
        conv2_params = {
            "filters": primary_block * primary_depth, # 256 convolutional filters
            "kernel_size": kernel_size, #Configureable //need to change to 5
            "strides": strides,     #Configureable
            "padding": "valid",
            "activation": tf.nn.relu
        }
        
        primary_layer_conv = tf.layers.conv2d(input_layer, **conv2_params)
        primary_layer_raw = tf.reshape(primary_layer_conv, [-1, primary_block, primary_depth])
        primary_layer = squash(primary_layer_raw)
        return primary_layer
    
    def secondary_capsule(primary_layer,secondary_block,secondary_depth):
        init_sigma = 0.01
        batch_size = tf.shape(primary_layer)[0]
        primary_n_caps = (int)(primary_layer.shape[1])
        primary_depth  = (int)(primary_layer.shape[2])
        
        W_init = tf.random_normal(
        shape=(1, primary_n_caps, secondary_block, secondary_depth, primary_depth),
                  stddev=init_sigma, dtype=tf.float32)
        W = tf.Variable(W_init)
       
        W_tiled = tf.tile(W, [batch_size, 1, 1, 1, 1])
        
        
        caps1_output_expanded = tf.expand_dims(primary_layer, -1)
        caps1_output_tile = tf.expand_dims(caps1_output_expanded, 2)
        caps1_output_tiled = tf.tile(caps1_output_tile, [1, 1, secondary_block, 1, 1])
        
        caps2_predicted = tf.matmul(W_tiled, caps1_output_tiled)
        
           
        #Routing by agreement
        raw_weights = tf.zeros([batch_size, primary_n_caps, secondary_block, 1, 1],dtype=np.float32)
        #Round 1
        routing_weights = tf.nn.softmax(raw_weights, dim=2)
       
        weighted_predictions = tf.multiply(routing_weights, caps2_predicted)
           
        weighted_sum = tf.reduce_sum(weighted_predictions, axis=1, keep_dims=True)
        caps2_output_round_1 = squash(weighted_sum, axis=-2)
            #Round 2
        caps2_output_round_1_tiled = tf.tile(caps2_output_round_1, [1, primary_n_caps, 1, 1, 1])
        agreement = tf.matmul(caps2_predicted, caps2_output_round_1_tiled, transpose_a=True)
        raw_weights_round_2 = tf.add(raw_weights, agreement)
            
        routing_weights_round_2 = tf.nn.softmax(raw_weights_round_2,dim=2)
        weighted_predictions_round_2 = tf.multiply(routing_weights_round_2,caps2_predicted)
        weighted_sum_round_2 = tf.reduce_sum(weighted_predictions_round_2,axis=1, keep_dims=True)
        caps2_output_round_2 = squash(weighted_sum_round_2,axis=-2)
        return caps2_output_round_2
    
    primary_layer = primary_capsule(input_layer,kernel_size,strides,primary_block,primary_depth)
    secondary_layer = secondary_capsule(primary_layer,secondary_block,secondary_depth)
    return secondary_layer

In [12]:
def safe_norm(s, axis=-1, epsilon=1e-7, keep_dims=False, name=None):
    with tf.name_scope(name, default_name="safe_norm"):
        squared_norm = tf.reduce_sum(tf.square(s), axis=axis,
                                     keep_dims=keep_dims)
        return tf.sqrt(squared_norm + epsilon)

In [20]:
from math import sqrt 
image_width= sqrt(mnist.train.images.shape[1])
image_height=image_width
image_channel=1
n_classes=10

In [14]:
X = tf.placeholder(shape=[None, image_width, image_width, image_channel], dtype=tf.float32, name="X")

In [15]:
conv1_raw = tf.layers.conv2d(X, filters=256, kernel_size=9, strides=1, padding="valid", activation=tf.nn.relu)
conv1_keep_prob = tf.placeholder(tf.float32)
conv1 = tf.nn.dropout(conv1_raw, conv1_keep_prob)

In [16]:
capsule1 = capsule_layer(conv1,kernel_size=6,strides=2,
                  primary_block=32,primary_depth=8,
                  secondary_block=10,secondary_depth=16)

In [None]:
capsule_output = capsule1

In [18]:

y_proba = safe_norm(capsule_output, axis=-2)
y_proba_argmax = tf.argmax(y_proba, axis=2)
y_pred = tf.squeeze(y_proba_argmax, axis=[1,2])

In [19]:
y = tf.placeholder(shape=[None], dtype=tf.int64, name="y")

In [22]:
m_plus = 0.9
m_minus = 0.1
lambda_ = 0.5
T = tf.one_hot(y, depth=n_classes, name="T")
caps2_output_norm = safe_norm(capsule_output, axis=-2, keep_dims=True)
present_error_raw = tf.square(tf.maximum(0., m_plus - caps2_output_norm))
present_error = tf.reshape(present_error_raw, shape=(-1, n_classes))
absent_error_raw = tf.square(tf.maximum(0., caps2_output_norm - m_minus))
absent_error = tf.reshape(absent_error_raw, shape=(-1, n_classes))
L = tf.add(T * present_error, lambda_ * (1.0 - T) * absent_error, name="L")
margin_loss = tf.reduce_mean(tf.reduce_sum(L, axis=1), name="margin_loss")