In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tqdm import tqdm
from datetime import datetime

%load_ext tensorboard

2024-02-21 20:04:46.896327: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
    raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

SystemError: GPU device not found

In [3]:
eps = 1e-7
m_plus = 0.9
m_minus = 0.1
lambda_ = 0.5
alpha = 0.0005
epochs = 50
no_of_secondary_capsules = 10

optimizer = tf.keras.optimizers.Adam()

In [4]:
params = {
    "no_of_conv_kernels": 256,
    "no_of_primary_capsules": 32,
    "no_of_secondary_capsules": 10,
    "primary_capsule_vector": 8,
    "secondary_capsule_vector": 16,
    "r": 3
}

In [5]:
checkpoint_path = './logs/model/capsule'

stamp = datetime.now().strftime("%Y%m%d-%H%M%S")

logdir = './logs/func/%s' % stamp
writer = tf.summary.create_file_writer(logdir)

scalar_logdir = './logs/scalars/%s' % stamp
file_writer = tf.summary.create_file_writer(scalar_logdir + "/metrics")

In [16]:
train_data = pd.read_csv("../data/fashion-mnist_train.csv")
test_data = pd.read_csv("../data/fashion-mnist_test.csv")

y_train = train_data.label
X_train = train_data.drop(columns=['label'])

y_test = test_data.label
X_test = test_data.drop(columns=['label'])

X_train = np.reshape(np.array(X_train), (60000, 28, 28, 1))
X_train = X_train.astype('float32') / 255.
X_test = np.reshape(np.array(X_test), (10000, 28, 28, 1))
X_test = X_test.astype('float32') / 255.

X_train = tf.cast(X_train, dtype=tf.float32)
X_test = tf.cast(X_test, dtype=tf.float32)

print(X_train.shape)
print(y_train.shape)

(60000, 28, 28, 1)
(60000,)


In [17]:
testing_dataset_size = X_test.shape[0]
training_dataset_size = X_train.shape[0]

In [18]:
dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
dataset = dataset.shuffle(buffer_size=len(dataset), reshuffle_each_iteration=True)
dataset = dataset.batch(batch_size=64)

In [21]:
testing = tf.data.Dataset.from_tensor_slices((X_test, y_test))
testing = dataset.batch(batch_size=64)

In [25]:
class CapsNet(tf.keras.Model):
    def __init__(self, no_of_conv_kernels, no_of_primary_capsules, primary_capsule_vector,
                    no_of_secondary_capsules, secondary_capsule_vector, r):
        super(CapsNet, self).__init__()
        self.no_of_conv_kernels = no_of_conv_kernels
        self.no_of_primary_capsules = no_of_primary_capsules
        self.primary_capsule_vector = primary_capsule_vector
        self.no_of_secondary_capsules = no_of_secondary_capsules
        self.secondary_capsule_vector = secondary_capsule_vector
        self.r = r
        
        with tf.name_scope("Variables") as scope:
            self.convolution = tf.keras.layers.Conv2D(self.no_of_conv_kernels, [9,9], strides=[1,1],
                                                     name="ConvolutionLayer", activation='relu')
            self.primary_capsule = tf.keras.layers.Conv2D(self.no_of_primary_capsules * self.primary_capsule_vector,
                                                         [9,9], strides=[2,2], name="PrimaryCapsule")
            self.w = tf.Variable(tf.random_normal_initializer()(shape=[1, 1152, self.no_of_secondary_capsules,
                                                                      self.secondary_capsule_vector, self.primary_capsule_vector,
                                                                      ]), dtype=tf.float32, name="PoseEstimation", trainable=True)
            self.dense_1 = tf.keras.layers.Dense(units=512, activation='relu')
            self.dense_2 = tf.keras.layers.Dense(units=1024, activation='relu')
            self.dense_3 = tf.keras.layers.Dense(units=784, activation='sigmoid', dtype='float32')
    def build(self, input_shape):
        pass
    
    def squash(self, s):
        with tf.name_scope("SquashFunction") as scope:
            s_norm = tf.norm(s, axis=-1, keepdims=True)
            return tf.square(s_norm)/(1+tf.square(s_norm)) * s/(s_norm + eps)
    
    @tf.function
    def call(self, inputs):
        input_x, y = inputs
        
        x = self.convolution(input_x)
        x = self.primary_capsule(x)
        
        with tf.name_scope("CapsuleFormation") as scope:
            u = tf.reshape(x, (-1, self.no_of_primary_capsules*x.shape[1]*x.shape[2], 8))
            u = tf.expand_dims(u, axis=-2)
            u = tf.expand_dims(u, axis=-1)
            u_hat = tf.matmul(self.w, u)
            u_hat = tf.squeeze(u_hat, [4])
            
        with tf.name_scope("DynamicRouting") as scope:
            b = tf.zeros((input_x.shape[0], 1152, self.no_of_secondary_capsules, 1))
            for i in range(self.r):
                c = tf.nn.softmax(b, axis=-2)
                s = tf.reduce_sum(tf.multiply(c, u_hat), axis=1, keepdims=True)
                v = self.squash(s)
                agreement = tf.squeeze(tf.matmul(tf.expand_dims(u_hat, axis=-1), tf.expand_dims(v, axis=-1),
                                                transpose_a=True), [4])
                b += agreement
                
        with tf.name_scope("Masking") as scope:
            y = tf.expand_dims(y, axis=-1)
            y = tf.expand_dims(y, axis=1)
            mask = tf.cast(y, dtype=tf.float32)
            v_masked = tf.multiply(mask, v)
            
        with tf.name_scope("Reconstruction") as scope:
            v_ = tf.reshape(v_masked, [-1, self.no_of_secondary_capsules*self.secondary_capsule_vector])
            reconstructed_image = self.dense_1(v_)
            reconstructed_image = self.dense_2(reconstructed_image)
            reconstructed_image = self.dense_3(reconstructed_image)
            
        return v, reconstructed_image
    
    @tf.function
    def predict_capsule_output(self, inputs):
        x = self.convolution(inputs)
        x = self.primary_capsule(x)
        
        with tf.name_scope("CapsuleFormation") as scope:
            u = tf.reshape(x, (-1, self.no_of_primary_capsules*x.shape[1]*x.shape[2], 8))
            u = tf.expand_dims(u, axis=-2)
            u = tf.expand_dims(u, axis=-1)
            u_hat = tf.matmul(self.w, u)
            u_hat = tf.squeeze(u_hat, [4])
            
        with tf.name_scope("DynamicRouting") as scope:
            b = tf.zeros((inputs.shape[0], 1152, self.no_of_secondary_capsules, 1))
            for i in range(self.r):
                c = tf.nn.softmax(b, axis=-2)
                s = tf.reduce_sum(tf.multiply(c, u_hat), axis=1, keepdims=True)
                v = self.squash(s)
                agreement = tf.squeeze(tf.matmul(tf.expand_dims(u_hat, axis=-1), tf.expand_dims(v, axis=-1),
                                                transpose_a=True), [4])
                b += agreement
                
        return v
    
    @tf.function
    def regeneate_image(self, inputs):
        with tf.name_scope("Reconstruction") as scope:
            v_ = tf.reshape(inputs, [-1, self.no_of_secondary_capsules*self.secondary_capsule_vector])
            reconstructed_image = self.dense_1(v_)
            reconstructed_image = self.dense_2(reconstructed_image)
            reconstructed_image = self.dense_3(reconstructed_image)
        return reconstructed_image

In [26]:
tf.summary.trace_on(graph=True, profiler=True)



In [28]:
model = CapsNet(**params)

In [29]:
def safe_norm(v, axis=-1, eps=1e-7):
    v_ = tf.reduce_sum(tf.square(v), axis=axis, keepdims=True)
    return tf.sqrt(v_ + eps)

In [30]:
def loss_function(v, reconstructed_image, y, y_image):
    prediction = safe_norm(v)
    prediction = tf.reshape(prediction, [-1, no_of_secondary_capsules])
    
    left_margin = tf.square(tf.maximum(0.0, m_plus - prediction))
    right_margin = tf.square(tf.maximum(0.0, prediction - m_minus))
    
    l = tf.add(y*left_margin, lambda_*(1.0-y)*right_margin)
    margin_loss = tf.reduce_mean(tf.reduce_sum(l, axis=-1))
    
    y_image_flat = tf.reshape(y_image, [-1, 784])
    reconstuction_loss = tf.reduce_mean(tf.square(y_image_flat - reconstructed_image))
    
    loss = tf.add(margin_loss, alpha*reconstuction_loss)
    
    return loss

In [31]:
def train(x, y):
    y_one_hot = tf.one_hot(y, depth=10)
    with tf.GradientTape() as tape:
        v, reconstructed_image = model([x, y_one_hot])
        loss = loss_function(v, reconstructed_image, y_one_hot, x)
    grad = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(grad, model.trainable_variables))
    return loss

In [32]:
_ = train(X_train[:32], y_train[:32])
with writer.as_default():
    tf.summary.trace_export(name="my_func_trace", step=0, profiler_outdir=logdir)

Instructions for updating:
use `tf.profiler.experimental.stop` instead.
Instructions for updating:
`tf.python.eager.profiler` has deprecated, use `tf.profiler` instead.
Instructions for updating:
`tf.python.eager.profiler` has deprecated, use `tf.profiler` instead.


2024-02-21 21:05:24.129652: I external/local_tsl/tsl/profiler/lib/profiler_session.cc:70] Profiler session collecting data.
2024-02-21 21:05:24.154482: I external/local_tsl/tsl/profiler/lib/profiler_session.cc:131] Profiler session tear down.


In [33]:
tf.summary.trace_off()

In [34]:
model.summary()

Model: "caps_net"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 ConvolutionLayer (Conv2D)   multiple                  20992     
                                                                 
 PrimaryCapsule (Conv2D)     multiple                  5308672   
                                                                 
 dense (Dense)               multiple                  82432     
                                                                 
 dense_1 (Dense)             multiple                  525312    
                                                                 
 dense_2 (Dense)             multiple                  803600    
                                                                 
Total params: 8215568 (31.34 MB)
Trainable params: 8215568 (31.34 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [35]:
def predict(model, x):
    pred = safe_norm(model.predict_capsule_output(x))
    pred = tf.squeeze(pred, [1])
    return np.argmax(pred, axis=1)[:,0]

In [36]:
checkpoint = tf.train.Checkpoint(model=model)

In [37]:
%tensorboard --logdir ./logs

ERROR: Could not find `tensorboard`. Please ensure that your PATH
contains an executable `tensorboard` program, or explicitly specify
the path to a TensorBoard binary by setting the `TENSORBOARD_BINARY`
environment variable.

In [None]:
losses = []
accuracy = []
for i in range(1, 1+1, 1):
    loss = 0
    with tqdm(total=len(dataset)) as pbar:
        description = "Epoch " + str(i) + "/" + str(epochs)
        pbar.set_description_str(description)
        
        for X_batch, y_batch in dataset:
            loss += train(X_batch, y_batch)
            pbar.update(1)
            
        loss /= len(dataset)
        losses.append(loss.numpy())
        
        training_sum = 0
        
        print_statement = "Loss: " + str(loss.numpy()) + " Evaluating Accuracy ..."
        pbar.set_postfix_str(print_statement)
        
        for X_batch, y_batch in dataset:
            training_sum += sum(predict(model, X_batch) == y_batch.numpy())
        accuracy.append(training_sum/training_dataset_size)
        
        with file_writer.as_default():
            tf.summary.scalar('Loss', data=loss.numpy(), step=i)
            tf.summary.scalar('Accuracy', data=accuracy[-1], step=i)
            
        print_statement = "Loss: " + str(loss.numpy()) + " Accuracy: " + str(accuracy[-1])
        
        if i%10 == 0:
            print_statement += ' Checkpoint saved'
            checkpoint.save(checkpoint_path)
            
        pbar.set_postfix_str(print_statement)

Epoch 1/50: 100%|█| 938/938 [21:25<00:00,  1.18s/it, Loss: 0.13142928 Evaluating

In [None]:
test_sum = 0
for X_batch, y_batch in testing:
    test_sum += sum(predict(model, X_batch) == y_batch.numpy())
print("Accuracy on test data is: " + str(test_sum/training_dataset_size))