In [1]:
import os
import sys
import git
import pathlib

import random

import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.datasets import mnist

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 

PROJ_ROOT_PATH = pathlib.Path(git.Repo('.', search_parent_directories=True).working_tree_dir)
PROJ_ROOT =  str(PROJ_ROOT_PATH)
if PROJ_ROOT not in sys.path:
    sys.path.append(PROJ_ROOT)

from libs import utils, mnist32_cnn
from libs.constants import model_seeds
from libs.errmatmul import matmul_ERR, N_THREADS_PER_BLOCK
from libs.fitnessfns import NO_OF_CLASSES

In [2]:
# Limit GPU growth
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

In [3]:
# load dataset  
fashion_mnist = tf.keras.datasets.fashion_mnist
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()

train_images = train_images.astype('float32') / 255.0
test_images = test_images.astype('float32') / 255.0

train_images = np.expand_dims(train_images, axis=-1)
test_images = np.expand_dims(test_images, axis=-1)

In [4]:
model_type = "fashion-cnn2_1024"
model_seed = model_seeds[0]

In [5]:
model_instance = model_type + "-" + str(model_seed)
model_folder = pathlib.Path(PROJ_ROOT_PATH / "models" / model_type)
model_filename = model_instance + ".h5"
model_file = str(pathlib.Path(model_folder/ model_filename))
# Load model
model = tf.keras.models.load_model(model_file)

In [6]:
# model.summary()

In [7]:
images = test_images[0:64]
labels = test_labels[0:64]

In [8]:
tf_c0_out = model.layers[0](images)

tf_p0_out = model.layers[1](
                    model.layers[0](images))

tf_c1_out = model.layers[3](
                    model.layers[1](
                        model.layers[0](images)))

tf_p1_out = model.layers[4](
                    model.layers[3](
                        model.layers[1](
                            model.layers[0](images))))

tf_flatten_out = model.layers[6](
                    model.layers[4](
                        model.layers[3](
                            model.layers[1](
                                model.layers[0](images)))))
tf_h0_out = model.layers[7](model.layers[6](
                                model.layers[4](
                                    model.layers[3](
                                        model.layers[1](
                                            model.layers[0](images))))))

tf_op_out = model.layers[9](model.layers[7](
                                model.layers[6](
                                    model.layers[4](
                                        model.layers[3](
                                            model.layers[1](
                                                model.layers[0](images)))))))

In [9]:
# tf_c0_out.shape

# tf_p0_out.shape

# tf_c1_out.shape

# tf_p1_out.shape

# tf_flatten_out.shape

# tf_h0_out.shape

# tf_op_out.shape

In [10]:
# model(images) # = tf_op_out

In [11]:
# Manual inference
dummy_error_profile = np.zeros((20_000,32), dtype="float32")
error_profile_c0 = dummy_error_profile#None
error_profile_c1 = dummy_error_profile#None
error_profile_h0 = dummy_error_profile#None
error_profile_op = dummy_error_profile#None
# ERR_PARAM_TF = None
# shuffle_order_c0 = None
# shuffle_order_c1 = None
# shuffle_order_h0 = None
# shuffle_order_op = None

shuffle_order_c0 = np.arange(model.get_layer("c0").weights[0].shape[-1])
shuffle_order_c1 = np.arange(model.get_layer("c1").weights[0].shape[-1])
shuffle_order_h0 = np.arange(model.get_layer("h0").weights[0].shape[-1])
shuffle_order_op = np.arange(model.get_layer("op").weights[0].shape[-1])
ERR_PARAM_TF = None
np.random.shuffle(shuffle_order_c0)
np.random.shuffle(shuffle_order_c1)
np.random.shuffle(shuffle_order_h0)
np.random.shuffle(shuffle_order_op)

In [12]:
# get weights and biases from model
c0_kernels, c0_biases = model.get_layer("c0").weights
c1_kernels, c1_biases = model.get_layer("c1").weights
h0_weights, h0_biases = model.get_layer("h0").weights
op_weights, op_biases = model.get_layer("op").weights

In [13]:
b_images = images
b_labels = labels

In [14]:
#####################################################################################
# L0: CONVOLUTION LAYER
## L0.A: Get dimension values
#### kernel height, kernel width, no of channels in input image, no of filter kernels 
kr_ht, kr_wt, no_ch, no_kr = c0_kernels.shape

no_im = b_images.shape[0]
no_ch = b_images.shape[-1]

assert no_im == len(b_labels)

### input image dimensions
im_ht = b_images.shape[1]
im_wt = b_images.shape[2]

### convolution layer output dimensions (padding=same)
y_ht = im_ht
y_wt = im_wt

### patch dimensions
no_of_patches = y_ht * y_wt
patch_len     = kr_ht * kr_wt * no_ch

## L0.B: Extract Images Patches
patches = tf.image.extract_patches(images=b_images,
                                 sizes=[1, kr_ht, kr_wt, 1],
                                 strides=[1, 1, 1, 1],
                                 rates=[1, 1, 1, 1],
                                 padding='SAME')
### flatten patches
flat_patches = tf.reshape(patches, (no_im, no_of_patches, patch_len))
### tranpose for matrix multiplication
flat_patches = tf.transpose(flat_patches, (0,2,1))

## L0.C: Flatten filter kernels
### first reorder kernels by no. of output-kernels
flat_kernels = tf.transpose(c0_kernels, perm=(3,0,1,2))
### then reshape to required matrix shape
flat_kernels = tf.reshape(flat_kernels, (no_kr, kr_ht*kr_wt*no_ch))

## L0.D: Perform Matrix Multiplication
conv_mul_out_list = []
### for each image in batch
for im in range(no_im):
    single_im_patch = flat_patches[im,:,:]
    # conv_out_list.append(tf.matmul(flat_kernels, single_im_patch))
    BLOCK_HEIGHT = N_THREADS_PER_BLOCK # no. of threads per block
    BLOCK_WIDTH = kr_ht*kr_wt # totcols is always (going to be) a multiple of BLOCK_WIDTH
    BATCH_BLOCK_SIZE = 32 # user-defined: NOT the actual batch block size in this context.
                            # simply the tile block width of matB
    # pad matrix for good matrix shape
    no_cols_to_pad = BATCH_BLOCK_SIZE-(single_im_patch.shape[1]%BATCH_BLOCK_SIZE)
    paddings = tf.constant([[0, 0,], # padding above and below
                            [0, no_cols_to_pad]]) # padding left and right
    padded_single_im_patch = tf.pad(single_im_patch, 
                                    paddings,
                                    mode="CONSTANT", 
                                    constant_values=0.0)
    # is shuffling required
    if shuffle_order_c0 is not None:
        # shuffle filter order matrix
        shuffled_kernels = tf.gather(flat_kernels, shuffle_order_c0)
    else:
        shuffled_kernels = flat_kernels

    # is error injection required
    if error_profile_c0 is not None:
        shuffled_conv_mul_out = matmul_ERR(shuffled_kernels, 
                                           padded_single_im_patch,
                                           BLOCK_HEIGHT, 
                                           BLOCK_WIDTH, 
                                           BATCH_BLOCK_SIZE, 
                                           ERR_PROFILE=error_profile_c0,
                                           ERR_PARAM_TF=ERR_PARAM_TF,)[:,:-no_cols_to_pad]            

    else:
        shuffled_conv_mul_out = tf.matmul(shuffled_kernels, padded_single_im_patch)[:,:-no_cols_to_pad]

    # was the kernel matrix shuffled ?
    if shuffle_order_c0 is not None:
        # unshuffle conv_out
        indices = tf.expand_dims(shuffle_order_c0, axis=1)
        updates = tf.range(tf.size(indices))
        shape = shuffle_order_c0.shape
        scatter = tf.scatter_nd(indices, updates, shape)
        conv_mul_out = tf.gather(shuffled_conv_mul_out, scatter)
    else:
        conv_mul_out = shuffled_conv_mul_out
    conv_mul_out_list.append(conv_mul_out)
    # this completes the matrix multiplication equivalent of convolution of *ONE* image in the batch of image

conv_out = tf.stack(conv_mul_out_list)
conv_out = tf.transpose(conv_out, (0,2,1)) # rearrange channel order
conv_out = tf.reshape(conv_out, (no_im, y_ht,y_wt, no_kr)) # reshape to filter output shape

## Add bias
conv_out = tf.nn.bias_add(conv_out, c0_biases)
## ReLU
conv0_out = tf.nn.relu(conv_out)
#####################################################################################

In [15]:
# sanity check for layer c0
tf.reduce_max(tf.abs(tf_c0_out-conv0_out))

<tf.Tensor: shape=(), dtype=float32, numpy=0.0012466013>

In [16]:
# L1: MAX POOLING LAYER
pool0_out = tf.nn.max_pool(conv0_out,
                            ksize=[1, 2, 2, 1], #(batch_size, height, width, depth)
                            strides=[1, 2, 2, 1], #(batch_size, height, width, depth)
                            padding='VALID')
#####################################################################################

In [17]:
# sanity check for layer p0
tf.reduce_max(tf.abs(tf_p0_out-pool0_out))

<tf.Tensor: shape=(), dtype=float32, numpy=0.0012466013>

In [18]:
#####################################################################################
# L2: DROPOUT LAYER (Disabled in Inference)
# L3: CONVOLUTION LAYER
## L3.A: Get dimension values
#### kernel height, kernel width, no of channels in input image, no of filter kernels 
kr_ht, kr_wt, no_ch, no_kr = c1_kernels.shape

no_im = pool0_out.shape[0]
no_ch = pool0_out.shape[-1]

assert no_im == len(b_labels)

### input image dimensions
im_ht = pool0_out.shape[1]
im_wt = pool0_out.shape[2]

### convolution layer output dimensions (padding=same)
y_ht = im_ht
y_wt = im_wt

### patch dimensions
no_of_patches = y_ht * y_wt
patch_len     = kr_ht * kr_wt * no_ch

## L3.B: Extract Images Patches
patches = tf.image.extract_patches(images=pool0_out,
                                 sizes=[1, kr_ht, kr_wt, 1],
                                 strides=[1, 1, 1, 1],
                                 rates=[1, 1, 1, 1],
                                 padding='SAME')
### flatten patches
flat_patches = tf.reshape(patches, (no_im, no_of_patches, patch_len))
### tranpose for matrix multiplication
flat_patches = tf.transpose(flat_patches, (0,2,1))

## L3.C: Flatten filter kernels
### first reorder kernels by no. of output-kernels
flat_kernels = tf.transpose(c1_kernels, perm=(3,0,1,2))
### then reshape to required matrix shape
flat_kernels = tf.reshape(flat_kernels, (no_kr, kr_ht*kr_wt*no_ch))

## L3.D: Perform Matrix Multiplication
conv_mul_out_list = []
### for each image in batch
for im in range(no_im):
    single_im_patch = flat_patches[im,:,:]
    # conv_out_list.append(tf.matmul(flat_kernels, single_im_patch))
    BLOCK_HEIGHT = N_THREADS_PER_BLOCK # no. of threads per block
    BLOCK_WIDTH = kr_ht*kr_wt # totcols is always (going to be) a multiple of BLOCK_WIDTH
    BATCH_BLOCK_SIZE = 32 # user-defined: NOT the actual batch block size in this context.
                            # simply the tile block width of matB
    # pad matrix for good matrix shape
    no_cols_to_pad = BATCH_BLOCK_SIZE-(single_im_patch.shape[1]%BATCH_BLOCK_SIZE)
    paddings = tf.constant([[0, 0,], # padding above and below
                            [0, no_cols_to_pad]]) # padding left and right
    padded_single_im_patch = tf.pad(single_im_patch, 
                                    paddings,
                                    mode="CONSTANT", 
                                    constant_values=0.0)
    # is shuffling required
    if shuffle_order_c1 is not None:
        # shuffle filter order matrix
        shuffled_kernels = tf.gather(flat_kernels, shuffle_order_c1)
    else:
        shuffled_kernels = flat_kernels

    # is error injection required
    if error_profile_c1 is not None:
        shuffled_conv_mul_out = matmul_ERR(shuffled_kernels, 
                                           padded_single_im_patch,
                                           BLOCK_HEIGHT, 
                                           BLOCK_WIDTH, 
                                           BATCH_BLOCK_SIZE, 
                                           ERR_PROFILE=error_profile_c1,
                                           ERR_PARAM_TF=ERR_PARAM_TF,)[:,:-no_cols_to_pad]            

    else:
        shuffled_conv_mul_out = tf.matmul(shuffled_kernels, padded_single_im_patch)[:,:-no_cols_to_pad]

    # was the kernel matrix shuffled ?
    if shuffle_order_c1 is not None:
        # unshuffle conv_out
        indices = tf.expand_dims(shuffle_order_c1, axis=1)
        updates = tf.range(tf.size(indices))
        shape = shuffle_order_c1.shape
        scatter = tf.scatter_nd(indices, updates, shape)
        conv_mul_out = tf.gather(shuffled_conv_mul_out, scatter)
    else:
        conv_mul_out = shuffled_conv_mul_out
    conv_mul_out_list.append(conv_mul_out)
    # this completes the matrix multiplication equivalent of convolution of *ONE* image in the batch of image

conv_out = tf.stack(conv_mul_out_list)
conv_out = tf.transpose(conv_out, (0,2,1)) # rearrange channel order
conv_out = tf.reshape(conv_out, (no_im, y_ht,y_wt, no_kr)) # reshape to filter output shape

## Add bias
conv_out = tf.nn.bias_add(conv_out, c1_biases)
## ReLU
conv1_out = tf.nn.relu(conv_out)
#####################################################################################

In [19]:
# sanity check for layer c1
tf.reduce_max(tf.abs(tf_c1_out-conv1_out))

<tf.Tensor: shape=(), dtype=float32, numpy=0.005921334>

In [20]:
# L4: MAX POOLING LAYER
pool1_out = tf.nn.max_pool(conv1_out,
                            ksize=[1, 2, 2, 1], #(batch_size, height, width, depth)
                            strides=[1, 2, 2, 1], #(batch_size, height, width, depth)
                            padding='VALID')
#####################################################################################

In [21]:
# sanity check for layer p0
tf.reduce_max(tf.abs(tf_p1_out-pool1_out))

<tf.Tensor: shape=(), dtype=float32, numpy=0.0045457482>

In [22]:
# L5: DROPOUT LAYER (Disabled in Inference)
# L6: FLATTEN LAYER
flat_out = tf.reshape(pool1_out, (no_im, -1) ) #[batch_size, flat_vec_size]
#####################################################################################

In [23]:
# sanity check for layer flatten
tf.reduce_max(tf.abs(tf_flatten_out-flat_out))

<tf.Tensor: shape=(), dtype=float32, numpy=0.0045457482>

In [24]:
# L7: HIDDEN LAYER 0
## tranpose input vector
h0_in = tf.transpose(flat_out, perm=[1,0]) #[flat_vec_size, batch_size]
## transpose weight matrices
h0_weights_tr = tf.transpose(h0_weights, perm=[1,0]) #[no_of_weights, flat_vec_size]

## is shuffling required
if shuffle_order_h0 is not None:
    ## shuffle weight matrix
    shuffled_weights = tf.gather(h0_weights_tr, shuffle_order_h0)
else:
    shuffled_weights = h0_weights_tr

## is error injection required
if error_profile_h0 is not None:
    ## multiply with shuffled weight matrix
    BLOCK_HEIGHT = N_THREADS_PER_BLOCK # no. of threads per block
    BLOCK_WIDTH = 32 # totcols is always (going to be) a multiple of BLOCK_WIDTH
    BATCH_BLOCK_SIZE = 32 # in reality, inference is always one image at a time. 
                         # However, here we are using batch inference here for speedup
    shuffled_mult_out = matmul_ERR(shuffled_weights, 
                                   h0_in,
                                   BLOCK_HEIGHT, 
                                   BLOCK_WIDTH, 
                                   BATCH_BLOCK_SIZE, 
                                   ERR_PROFILE=error_profile_h0,
                                   ERR_PARAM_TF=ERR_PARAM_TF)
else:
    shuffled_mult_out = tf.linalg.matmul(shuffled_weights, h0_in)

## was the weight matrix shuffled
if shuffle_order_h0 is not None:
    # unshuffle mult_out
    indices = tf.expand_dims(shuffle_order_h0, axis=1)
    updates = tf.range(tf.size(indices))
    shape = shuffle_order_h0.shape
    scatter = tf.scatter_nd(indices, updates, shape)
    h0_mult_out = tf.gather(shuffled_mult_out, scatter)
else:
    h0_mult_out = shuffled_mult_out


# Add bias
h0_bout = tf.add(h0_mult_out, tf.expand_dims(h0_biases,axis=1))
# RelU
h0_out = tf.nn.relu(h0_bout)
# h0_out needs to be transposed again in h1_in
# so although h0_out shape is not "standard", we output it as it is

#####################################################################################

In [25]:
# sanity check for layer h0
tf.reduce_max(tf.abs(tf_h0_out-tf.transpose(h0_out)))

<tf.Tensor: shape=(), dtype=float32, numpy=0.0069750547>

In [26]:
# L8: DROPOUT LAYER (Disabled in Inference)
# L9: OUTPUT LAYER
## tranpose input vector
op_in = h0_out
## transpose weight matrices
op_weights_tr = tf.transpose(op_weights, perm=[1,0]) #[no_of_weights, flat_vec_size]

## is shuffling required
if shuffle_order_op is not None:
    ## shuffle weight matrix
    shuffled_weights = tf.gather(op_weights_tr, shuffle_order_op)
else:
    shuffled_weights = op_weights_tr

## is error injection required
if error_profile_op is not None:
    ## multiply with shuffled weight matrix
    BLOCK_HEIGHT = NO_OF_CLASSES # no. of threads per block
    BLOCK_WIDTH = 32 # totcols is always (going to be) a multiple of BLOCK_WIDTH
    BATCH_BLOCK_SIZE = 32 # inference is always one image at a time.
    shuffled_mult_out = matmul_ERR(shuffled_weights, 
                                   op_in,
                                   BLOCK_HEIGHT, 
                                   BLOCK_WIDTH, 
                                   BATCH_BLOCK_SIZE, 
                                   ERR_PROFILE=error_profile_op,
                                   ERR_PARAM_TF=ERR_PARAM_TF)
else:
    shuffled_mult_out = tf.linalg.matmul(shuffled_weights, op_in)

## was the weight matrix shuffled
if shuffle_order_op is not None:
    # unshuffle mult_out
    indices = tf.expand_dims(shuffle_order_op, axis=1)
    updates = tf.range(tf.size(indices))
    shape = shuffle_order_op.shape
    scatter = tf.scatter_nd(indices, updates, shape)
    op_mult_out = tf.gather(shuffled_mult_out, scatter)
else:
    op_mult_out = shuffled_mult_out


# Add bias
op_bout = tf.add(op_mult_out, tf.expand_dims(op_biases,axis=1))
# Softmax
op_out = tf.nn.softmax(op_bout, axis=0)
# Tranpose to standard order
class_scores = tf.transpose(op_out, perm=[1,0])

In [27]:
# sanity check for layer op
tf.reduce_max(tf.abs(tf_op_out-class_scores))

<tf.Tensor: shape=(), dtype=float32, numpy=0.0015574396>

In [28]:
# sanity check for layer op
tf.reduce_max(tf.abs(model(images)-class_scores))

<tf.Tensor: shape=(), dtype=float32, numpy=0.0015574396>

In [29]:
# Get predictions
predictions = tf.math.argmax(class_scores, axis=1)
## count no. of wrong predicitons
# return predictions
tf.math.reduce_sum(tf.cast(tf.math.not_equal(tf.cast(b_labels, dtype=tf.int64), predictions), dtype=tf.int64))

<tf.Tensor: shape=(), dtype=int64, numpy=6>

In [30]:
tf_predictions = tf.math.argmax(model(images), axis=1)

In [31]:
tf_predictions - predictions 

<tf.Tensor: shape=(64,), dtype=int64, numpy=
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])>

In [32]:
tf.math.reduce_sum(tf.cast(tf.math.not_equal(tf.cast(b_labels, dtype=tf.int64), predictions), dtype=tf.int64))

<tf.Tensor: shape=(), dtype=int64, numpy=6>

In [33]:
tf.math.reduce_sum(tf.cast(tf.math.not_equal(tf.cast(b_labels, dtype=tf.int64), tf_predictions), dtype=tf.int64))

<tf.Tensor: shape=(), dtype=int64, numpy=6>