# Implementation of Accurate Binary Convolution Layer
The main notebook is **ABC.ipynb**. In this notebook, *alphas* training is moved out of the layer, so that the variables and functions can be made reusable for inference time.

In [1]:
from __future__ import division, print_function
import tensorflow as tf
import numpy as np

#### See *ABC* notebook for explanation of all the functions

In [2]:
def get_mean_stddev(input_tensor):
    with tf.name_scope('mean_stddev_cal'):
        mean, variance = tf.nn.moments(input_tensor, axes=range(len(input_tensor.get_shape())))
        stddev = tf.sqrt(variance, name="standard_deviation")
        return mean, stddev
    
# TODO: Allow shift parameters to be learnable
def get_shifted_stddev(stddev, no_filters):
    with tf.name_scope('shifted_stddev'):
        spreaded_deviation = -1. + (2./(no_filters - 1)) * tf.convert_to_tensor(range(no_filters),
                                                                                dtype=tf.float32)
        return spreaded_deviation * stddev
    
def get_binary_filters(convolution_filters, no_filters, name=None):
    with tf.name_scope(name, default_name="get_binary_filters"):
        mean, stddev = get_mean_stddev(convolution_filters)
        shifted_stddev = get_shifted_stddev(stddev, no_filters)
        
        # Normalize the filters by subtracting mean from them
        mean_adjusted_filters = convolution_filters - mean
        
        # Tiling filters to match the number of filters
        expanded_filters = tf.expand_dims(mean_adjusted_filters, axis=0, name="expanded_filters")
        tiled_filters = tf.tile(expanded_filters, [no_filters] + [1] * len(convolution_filters.get_shape()),
                                name="tiled_filters")
        
        # Similarly tiling spreaded stddev to match the shape of tiled_filters
        expanded_stddev = tf.reshape(shifted_stddev, [no_filters] + [1] * len(convolution_filters.get_shape()),
                                     name="expanded_stddev")
        
        binarized_filters = tf.sign(tiled_filters + expanded_stddev, name="binarized_filters")
        return binarized_filters

Now, instead of get_alphas, implementation of **alpha training** is provided, which takes input of the *filters*, *binarized filters*, and *alphas* and returns the loss and the alpha training operation

In [3]:
def alpha_training(convolution_filters, binary_filters, alphas, no_filters):
    with tf.name_scope("alpha_training"):
        reshaped_convolution_filters = tf.reshape(convolution_filters, [-1], name="reshaped_convolution_filters")
        reshaped_binary_filters = tf.reshape(binary_filters, [no_filters, -1],
                                             name="reshaped_binary_filters")
        
        weighted_sum_filters = tf.reduce_sum(tf.multiply(alphas, reshaped_binary_filters),
                                             axis=0, name="weighted_sum_filters")
        
        # Defining loss
        error = tf.square(reshaped_convolution_filters - weighted_sum_filters, name="alphas_error")
        loss = tf.reduce_mean(error, axis=0, name="alphas_loss")
        
        # Defining optimizer
        training_op = tf.train.AdamOptimizer().minimize(loss, var_list=[alphas],
                                                        name="alphas_training_op")
        
        return training_op, loss

Now, both *ABC* and *ApproxConv* is updated to incorporate this change

In [4]:
def ApproxConv(no_filters, alphas, binary_filters, convolution_biases=None,
               strides=(1, 1), padding="VALID", name=None):
    with tf.name_scope(name, "ApproxConv"):
        if convolution_biases is None:
            biases = 0.
        else:
            biases = convolution_biases
        
        # Defining function for closure to accept multiple inputs with same filters
        def ApproxConvLayer(input_tensor, name=None):
            with tf.name_scope(name, "ApproxConv_Layer"):
                # Reshaping alphas to match the input tensor
                reshaped_alphas = tf.reshape(alphas,
                                             shape=[no_filters] + [1] * len(input_tensor.get_shape()),
                                             name="reshaped_alphas")
                
                # Calculating convolution for each binary filter
                approxConv_outputs = []
                for index in range(no_filters):
                    # Binary convolution
                    this_conv = tf.nn.conv2d(input_tensor, binary_filters[index],
                                             strides=(1,) + strides + (1,),
                                             padding=padding)
                    approxConv_outputs.append(this_conv + biases)
                conv_outputs = tf.convert_to_tensor(approxConv_outputs, dtype=tf.float32,
                                                    name="conv_outputs")
                
                # Summing up each of the binary convolution
                ApproxConv_output = tf.reduce_sum(tf.multiply(conv_outputs, reshaped_alphas), axis=0)
                
                return ApproxConv_output
        
        return ApproxConvLayer
    
def ABC(binary_filters, alphas, shift_parameters, betas, 
        convolution_biases=None, no_binary_filters=5, no_ApproxConvLayers=5,
        strides=(1, 1), padding="VALID", name=None):
    with tf.name_scope(name, "ABC"):        
        # Instantiating the ApproxConv Layer
        ApproxConvLayer= ApproxConv(no_binary_filters, alphas, binary_filters, convolution_biases,
                                    strides, padding)
        
        def ABCLayer(input_tensor, name=None):
            with tf.name_scope(name, "ABCLayer"):
                # Reshaping betas to match the input tensor
                reshaped_betas = tf.reshape(betas,
                                            shape=[no_ApproxConvLayers] + [1] * len(input_tensor.get_shape()),
                                            name="reshaped_betas")
                
                # Calculating ApproxConv for each shifted input
                ApproxConv_layers = []
                for index in range(no_ApproxConvLayers):
                    # Shifting and binarizing input
                    shifted_input = tf.clip_by_value(input_tensor + shift_parameters[index], 0., 1.,
                                                     name="shifted_input_" + str(index))
                    binarized_activation = tf.sign(shifted_input - 0.5)
                    
                    # Passing through the ApproxConv layer
                    ApproxConv_layers.append(ApproxConvLayer(binarized_activation))
                ApproxConv_output = tf.convert_to_tensor(ApproxConv_layers, dtype=tf.float32,
                                                         name="ApproxConv_output")
                
                # Taking the weighted sum using the betas
                ABC_output = tf.reduce_sum(tf.multiply(ApproxConv_output, reshaped_betas), axis=0)
                return ABC_output
        
        return ABCLayer

#### Now a layer can be created as follows

In [10]:
test_filters = np.random.normal(size=(3, 3, 1, 64))
test_biases = np.random.normal(size=(64,))
test_input = np.random.normal(size=(32, 28, 28, 1))

In [11]:
g = tf.Graph()

In [12]:
with g.as_default():
    filters = tf.Variable(tf.convert_to_tensor(test_filters, dtype=tf.float32), name="convolution_filters")
    biases = tf.Variable(tf.convert_to_tensor(test_biases, dtype=tf.float32), name="convolution_biases")
    alphas = tf.Variable(tf.constant(1., shape=(5, 1)), dtype=tf.float32,
                         name="alphas")
    shift_parameters = tf.Variable(tf.constant(0., shape=(5, 1)), dtype=tf.float32,
                                   name="shift_parameters")
    betas = tf.Variable(tf.constant(1., shape=(5, 1)), dtype=tf.float32,
                        name="betas")
    
    binary_filters = get_binary_filters(filters, 5)
    alphas_training_op, alphas_loss = alpha_training(tf.stop_gradient(filters),
                                                     tf.stop_gradient(binary_filters),
                                                     alphas, 5)
    ABC_layer = ABC(binary_filters, tf.stop_gradient(alphas), shift_parameters, betas, biases)
    
    output = ABC_layer(tf.convert_to_tensor(test_input, dtype=tf.float32))

### Testing
Let's test the updated architecture on MNIST again

In [5]:
# MNIST data import
# Importing data
from tensorflow.examples.tutorials.mnist import input_data
!mkdir -p /tmp/data
mnist = input_data.read_data_sets("/tmp/data/")

Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.
Extracting /tmp/data/train-images-idx3-ubyte.gz
Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


The following is exactly same as in the other notebook *ABC*

In [6]:
# Defining utils function
def weight_variable(shape, name="weight"):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial, name=name)

def bias_variable(shape, name="bias"):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial, name=name)

def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                        strides=[1, 2, 2, 1], padding='SAME')

In [7]:
# Creating the graph
without_ABC_graph = tf.Graph()
with without_ABC_graph.as_default():
    # Defining inputs
    x = tf.placeholder(dtype=tf.float32)
    x_image = tf.reshape(x, [-1, 28, 28, 1])
    
     # Convolution Layer 1
    W_conv1 = weight_variable(shape=([5, 5, 1, 32]), name="W_conv1")
    b_conv1 = bias_variable(shape=[32], name="b_conv1")
    conv1 = (conv2d(x_image, W_conv1) + b_conv1)
    pool1 = max_pool_2x2(conv1)
    bn_conv1 = tf.layers.batch_normalization(pool1, axis=-1, name="batchNorm1")
    h_conv1 = tf.nn.relu(bn_conv1)

    # Convolution Layer 2
    W_conv2 = weight_variable(shape=([5, 5, 32, 64]), name="W_conv2")
    b_conv2 = bias_variable(shape=[64], name="b_conv2")
    conv2 = (conv2d(h_conv1, W_conv2) + b_conv2)
    pool2 = max_pool_2x2(conv2)
    bn_conv2 = tf.layers.batch_normalization(pool2, axis=-1, name="batchNorm2")
    h_conv2 = tf.nn.relu(bn_conv2)

    # Flat the conv2 output
    h_conv2_flat = tf.reshape(h_conv2, shape=(-1, 7*7*64))

    # Dense layer1
    W_fc1 = weight_variable([7 * 7 * 64, 1024])
    b_fc1 = bias_variable([1024])
    h_fc1 = tf.nn.relu(tf.matmul(h_conv2_flat, W_fc1) + b_fc1)

    # Dropout
    keep_prob = tf.placeholder(tf.float32)
    h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

    # Output layer
    W_fc2 = weight_variable([1024, 10])
    b_fc2 = bias_variable([10])

    y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
    
    # Labels
    y = tf.placeholder(tf.int32, [None])
    y_ = tf.one_hot(y, 10)
    
    # Defining optimizer and loss
    cross_entropy = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv))
    train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
    correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
    # Initializer
    graph_init = tf.global_variables_initializer()

In [8]:
# Defining variables to save. These will be fed to our custom layer
variables_to_save = {"W_conv1": W_conv1,
                     "b_conv1": b_conv1,
                     "W_conv2": W_conv2,
                     "b_conv2": b_conv2,
                     "W_fc1": W_fc1,
                     "b_fc1": b_fc1,
                     "W_fc2": W_fc2,
                     "b_fc2": b_fc2}
values = {}

In [9]:
n_epochs = 5
batch_size = 32
        
with tf.Session(graph=without_ABC_graph) as sess:
    sess.run(graph_init)
    for epoch in range(n_epochs):
        for iteration in range(1, 200 + 1):
            batch = mnist.train.next_batch(50)
            
            # Run operation and calculate loss
            _, loss_train = sess.run([train_step, cross_entropy],
                                     feed_dict={x: batch[0], y: batch[1], keep_prob: 0.5})
            print("\rIteration: {}/{} ({:.1f}%)  Loss: {:.5f}".format(
                      iteration, 200,
                      iteration * 100 / 200,
                      loss_train),
                  end="")

        # At the end of each epoch,
        # measure the validation loss and accuracy:
        loss_vals = []
        acc_vals = []
        for iteration in range(1, 200 + 1):
            X_batch, y_batch = mnist.validation.next_batch(batch_size)
            acc_val, loss_val = sess.run([accuracy, cross_entropy],
                                     feed_dict={x: batch[0], y: batch[1], keep_prob: 1.0})
            loss_vals.append(loss_val)
            acc_vals.append(acc_val)
            print("\rEvaluating the model: {}/{} ({:.1f}%)".format(iteration, 200,
                iteration * 100 / 200),
                  end=" " * 10)
        loss_val = np.mean(loss_vals)
        acc_val = np.mean(acc_vals)
        print("\rEpoch: {}  Val accuracy: {:.4f}%  Loss: {:.6f}".format(
            epoch + 1, acc_val * 100, loss_val))
        
    # On completion of training, save the variables to be fed to custom model
    for var_name in variables_to_save:
        values[var_name] = sess.run(variables_to_save[var_name])

Epoch: 1  Val accuracy: 80.0000%  Loss: 0.575571
Epoch: 2  Val accuracy: 88.0000%  Loss: 0.516295
Epoch: 3  Val accuracy: 98.0000%  Loss: 0.074902
Epoch: 4  Val accuracy: 96.0000%  Loss: 0.114960
Epoch: 5  Val accuracy: 96.0000%  Loss: 0.108748        


The 100% accuracy is not an error. It is due to the fact that complete validation set is not being evaluated only part of it is being evaluated and our model got all right answers in that part

#### Creating the custom model
While creating the custom model, we will need to create all the variables ourself.

First let's create a function that returns the required mean and variance for the batchnorm layer. Batchnorm layer requires that mean and variance be calculated of every layer except that of the channels layer

In [10]:
def bn_mean_variance(input_tensor, axis=-1, keep_dims=True):
    shape = len(input_tensor.get_shape())
    if axis < 0:
        axis += shape
    dimension_range = range(shape)
    return tf.nn.moments(input_tensor, axes=dimension_range[:axis] + dimension_range[axis+1:],
                         keep_dims=keep_dims)

In [13]:
custom_graph = tf.Graph()
with custom_graph.as_default():
    alphas_training_operations = []
    alphas_variables = []
    
    # Setting configuration
    no_filters_conv1 = 5
    no_layers_conv1 = 5
    no_filters_conv2 = 5
    no_layers_conv2 = 5
    
    # Inputs
    x = tf.placeholder(dtype=tf.float32)
    x_image = tf.reshape(x, [-1, 28, 28, 1])
    
    # Convolution Layer 1
    W_conv1 = tf.Variable(values["W_conv1"], name="W_conv1")
    b_conv1 = tf.Variable(values["b_conv1"], name="b_conv1")
    # Creating new variables
    alphas_conv1 = tf.Variable(tf.random_normal(shape=(no_filters_conv1, 1), mean=1.0, stddev=0.1),
                               dtype=tf.float32, name="alphas_conv1")
    shift_parameters_conv1 = tf.Variable(tf.constant(0., shape=(no_layers_conv1, 1)),
                                         dtype=tf.float32, name="shift_parameters_conv1")
    betas_conv1 = tf.Variable(tf.constant(1., shape=(no_layers_conv1, 1)),
                              dtype=tf.float32, name="betas_conv1")
    # Performing the operations
    binary_filters_conv1 = get_binary_filters(W_conv1, no_filters_conv1)
    alpha_training_conv1, alpha_loss_conv1 = alpha_training(tf.stop_gradient(W_conv1, "no_gradient_W_conv1"),
                                                            tf.stop_gradient(binary_filters_conv1,
                                                                             "no_gradient_binary_filters_conv1"),
                                                            alphas_conv1, no_filters_conv1)
    conv1 = ABC(binary_filters_conv1, tf.stop_gradient(alphas_conv1), shift_parameters_conv1,
                betas_conv1, b_conv1, padding="SAME")(x_image)
    # Saving the alphas training operation and the variable
    alphas_training_operations.append(alpha_training_conv1)
    alphas_variables.append(alphas_conv1)
    
    # Other layers
    pool1 = max_pool_2x2(conv1)
    # BatchNorm 
    mean_conv1, variance_conv1 = bn_mean_variance(pool1)
    bn_gamma_conv1 = tf.Variable(tf.ones(shape=(32,), dtype=tf.float32), name="bn_gamma_conv1")
    bn_beta_conv1 = tf.Variable(tf.zeros(shape=(32,), dtype=tf.float32), name="bn_beta_conv1")
    bn_conv1 = tf.nn.batch_normalization(pool1, mean_conv1, variance_conv1,
                                         bn_beta_conv1, bn_gamma_conv1, 0.001)
    h_conv1 = tf.nn.relu(bn_conv1)

    # Convolution Layer 2
    W_conv2 = tf.Variable(values["W_conv2"], name="W_conv2")
    b_conv2 = tf.Variable(values["b_conv2"], name="b_conv2")
    
    # Creating new variables
    alphas_conv2 = tf.Variable(tf.random_normal(shape=(no_filters_conv2, 1), mean=1.0, stddev=0.1),
                               dtype=tf.float32, name="alphas_conv2")
    shift_parameters_conv2 = tf.Variable(tf.constant(0., shape=(no_layers_conv2, 1)),
                                         dtype=tf.float32, name="shift_parameters_conv2")
    betas_conv2 = tf.Variable(tf.constant(1., shape=(no_layers_conv2, 1)),
                              dtype=tf.float32, name="betas_conv2")
    
    # Performing the operations
    binary_filters_conv2 = get_binary_filters(W_conv2, no_filters_conv2)
    alpha_training_conv2, alpha_loss_conv2 = alpha_training(tf.stop_gradient(W_conv2, "no_gradient_W_conv2"),
                                                            tf.stop_gradient(binary_filters_conv2,
                                                                             "no_gradient_binary_filters_conv2"),
                                                            alphas_conv2, no_filters_conv2)
    conv2 = ABC(binary_filters_conv2, tf.stop_gradient(alphas_conv2), shift_parameters_conv2,
                betas_conv2, b_conv2, padding="SAME")(h_conv1)
    
    # Saving the alphas training operation and the variable
    alphas_training_operations.append(alpha_training_conv2)
    alphas_variables.append(alphas_conv2)
    
    # Other layers
    pool2 = max_pool_2x2(conv2)
    # BatchNorm
    mean_conv2, variance_conv2 = bn_mean_variance(pool2)
    bn_gamma_conv2 = tf.Variable(tf.ones(shape=(64,), dtype=tf.float32), name="bn_gamma_conv2")
    bn_beta_conv2 = tf.Variable(tf.zeros(shape=(64,), dtype=tf.float32), name="bn_beta_conv2")
    bn_conv2 = tf.nn.batch_normalization(pool2, mean_conv2, variance_conv2,
                                         bn_beta_conv2, bn_gamma_conv2, 0.001)
    h_conv2 = tf.nn.relu(bn_conv2)

    # Flat the conv2 output
    h_conv2_flat = tf.reshape(h_conv2, shape=(-1, 7*7*64))

    # Dense layer1
    W_fc1 = tf.convert_to_tensor(values["W_fc1"], dtype=tf.float32)
    b_fc1 = tf.convert_to_tensor(values["b_fc1"], dtype=tf.float32)
    h_fc1 = tf.nn.relu(tf.matmul(h_conv2_flat, W_fc1) + b_fc1)

    # Dropout
    keep_prob = tf.placeholder(tf.float32)
    h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

    # Output layer
    W_fc2 = tf.convert_to_tensor(values["W_fc2"], dtype=tf.float32)
    b_fc2 = tf.convert_to_tensor(values["b_fc2"], dtype=tf.float32)
    y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
    
    # Labels
    y = tf.placeholder(tf.int32, [None])
    y_ = tf.one_hot(y, 10)
    
    # Defining optimizer and loss
    cross_entropy = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv))
    train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
    correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
    graph_init = tf.global_variables_initializer()
    alphas_init = tf.variables_initializer(alphas_variables)

Let's create the dictionary of variables to save

In [14]:
# Defining variables to save. These will be fed to our custom layer
variables_to_save = {"W_conv1": W_conv1,
                     "b_conv1": b_conv1,
                     "alphas_conv1": alphas_conv1,
                     "betas_conv1": betas_conv1,
                     "shift_parameters_conv1": shift_parameters_conv1,
                     "bn_gamma_conv1": bn_gamma_conv1,
                     "bn_beta_conv1": bn_beta_conv1,
                     "W_conv2": W_conv2,
                     "b_conv2": b_conv2,
                     "alphas_conv2": alphas_conv2,
                     "betas_conv2": betas_conv2,
                     "shift_parameters_conv2": shift_parameters_conv2,
                     "bn_gamma_conv2": bn_gamma_conv2,
                     "bn_beta_conv2": bn_beta_conv2,
                     "W_fc1": W_fc1,
                     "b_fc1": b_fc1,
                     "W_fc2": W_fc2,
                     "b_fc2": b_fc2}
values = {}

In [15]:
%%time
n_epochs = 5
batch_size = 32
alpha_training_epochs = 200
        
with tf.Session(graph=custom_graph) as sess:
    sess.run(graph_init)
    for epoch in range(n_epochs):
        for iteration in range(1, 200 + 1):
            # Training alphas
            sess.run(alphas_init)
            for alpha_training_op in alphas_training_operations:
                for alpha_epoch in range(alpha_training_epochs):
                    sess.run(alpha_training_op)
            
            batch = mnist.train.next_batch(50)
            
            # Run operation and calculate loss
            _, loss_train = sess.run([train_step, cross_entropy],
                                     feed_dict={x: batch[0], y: batch[1], keep_prob: 0.5})
            print("\rIteration: {}/{} ({:.1f}%)  Loss: {:.5f}".format(
                      iteration, 200,
                      iteration * 100 / 200,
                      loss_train),
                  end="")

        # At the end of each epoch,
        # measure the validation loss and accuracy:
        
        # Training alphas
        sess.run(alphas_init)
        for alpha_training_op in alphas_training_operations:
            for alpha_epoch in range(alpha_training_epochs):
                sess.run(alpha_training_op)
                    
        loss_vals = []
        acc_vals = []
        for iteration in range(1, 200 + 1):            
            X_batch, y_batch = mnist.validation.next_batch(batch_size)
            acc_val, loss_val = sess.run([accuracy, cross_entropy],
                                     feed_dict={x: batch[0], y: batch[1], keep_prob: 1.0})
            loss_vals.append(loss_val)
            acc_vals.append(acc_val)
            print("\rEvaluating the model: {}/{} ({:.1f}%)".format(iteration, 200,
                iteration * 100 / 200),
                  end=" " * 10)
        loss_val = np.mean(loss_vals)
        acc_val = np.mean(acc_vals)
        print("\rEpoch: {}  Val accuracy: {:.4f}%  Loss: {:.6f}".format(
            epoch + 1, acc_val * 100, loss_val))
        
    # On completion of training, save the variables to be fed to custom model
    for var_name in variables_to_save:
        values[var_name] = sess.run(variables_to_save[var_name])

Epoch: 1  Val accuracy: 90.0000%  Loss: 0.314954
Epoch: 2  Val accuracy: 76.0000%  Loss: 0.954873
Epoch: 3  Val accuracy: 80.0000%  Loss: 0.985948
Epoch: 4  Val accuracy: 84.0000%  Loss: 1.012544
Epoch: 5  Val accuracy: 78.0000%  Loss: 1.004487
CPU times: user 4min 42s, sys: 26.4 s, total: 5min 8s
Wall time: 5min 6s


Now, only the required variables can be saved for inference time. Using the **W_conv1** and **W_conv2**, values for binary filters and alphas can be calculated and those can be used along with **shift_parameters** and **betas** to create ABC layer for inference

### Pure inference testing
OK! Let's extract the binary filters and alphas and throw away the weights and test our network. This will ensure that we do not have any bug in the implementation of the ABC layer

Creating graphs for alphas calculation

In [22]:
alpha1_cal_graph = tf.Graph()
with alpha1_cal_graph.as_default():
    alphas1 = tf.Variable(tf.random_normal(shape=(no_filters_conv1, 1), mean=1.0, stddev=0.1))
    conv_filters1 = tf.placeholder(dtype=tf.float32, shape=(5, 5, 1, 32))
    bin_filters1 = get_binary_filters(convolution_filters=conv_filters1,
                                     no_filters=no_filters_conv1)
    alpha_training_op1, alpha_training_loss1 = alpha_training(conv_filters1, bin_filters1,
                                                            alphas1, no_filters_conv1)
    al_init1 = tf.global_variables_initializer()
    
alpha2_cal_graph = tf.Graph()
with alpha2_cal_graph.as_default():
    alphas2 = tf.Variable(tf.random_normal(shape=(no_filters_conv1, 1), mean=1.0, stddev=0.1))
    conv_filters2 = tf.placeholder(dtype=tf.float32, shape=(5, 5, 32, 64))
    bin_filters2 = get_binary_filters(convolution_filters=conv_filters2,
                                     no_filters=no_filters_conv2)
    alpha_training_op2, alpha_training_loss2 = alpha_training(conv_filters2, bin_filters2,
                                                            alphas2, no_filters_conv2)
    al_init2 = tf.global_variables_initializer()

Calculating alphas and binary filters

In [23]:
with tf.Session(graph=alpha1_cal_graph) as sess:
    al_init1.run()
    for epoch in range(200):
        sess.run(alpha_training_op1, feed_dict={conv_filters1: values["W_conv1"]})
    cal_bin_filters, cal_alphas = sess.run([bin_filters1, alphas1], feed_dict={conv_filters1: values["W_conv1"]})
    values["binary_filters_conv1"] = cal_bin_filters
    values["alphas_conv1"] = cal_alphas

with tf.Session(graph=alpha2_cal_graph) as sess:
    al_init2.run()
    for epoch in range(200):
        sess.run(alpha_training_op2, feed_dict={conv_filters2: values["W_conv2"]})
    cal_bin_filters, cal_alphas = sess.run([bin_filters2, alphas2], feed_dict={conv_filters2: values["W_conv2"]})
    values["binary_filters_conv2"] = cal_bin_filters
    values["alphas_conv2"] = cal_alphas

#### Building inference model
Now, we have all our variables, let's build an inference model

In [25]:
inference_graph = tf.Graph()
with inference_graph.as_default():
    # Setting configuration
    no_filters_conv1 = 5
    no_layers_conv1 = 5
    no_filters_conv2 = 5
    no_layers_conv2 = 5
    
    # Inputs
    x = tf.placeholder(dtype=tf.float32)
    x_image = tf.reshape(x, [-1, 28, 28, 1])
    
    # Convolution Layer 1
    b_conv1 = tf.convert_to_tensor(values["b_conv1"], dtype=tf.float32, name="b_conv1")
    alphas_conv1 = tf.convert_to_tensor(values["alphas_conv1"],
                                        dtype=tf.float32, name="alphas_conv1")
    shift_parameters_conv1 = tf.convert_to_tensor(values["shift_parameters_conv1"],
                                                  dtype=tf.float32, name="shift_parameters_conv1")
    betas_conv1 = tf.convert_to_tensor(values["betas_conv1"],
                                       dtype=tf.float32, name="betas_conv1")
    # Performing the operations
    binary_filters_conv1 = tf.convert_to_tensor(values["binary_filters_conv1"], dtype=tf.float32,
                                                name="binary_filters_conv1")
    conv1 = ABC(binary_filters_conv1, tf.stop_gradient(alphas_conv1), shift_parameters_conv1,
                betas_conv1, b_conv1, padding="SAME")(x_image)
    # Other layers
    pool1 = max_pool_2x2(conv1)
    # batch norm parameters
    mean_conv1, variance_conv1 = bn_mean_variance(pool1)
    bn_gamma_conv1 = tf.convert_to_tensor(values["bn_gamma_conv1"], dtype=tf.float32,
                                          name="bn_gamma_conv1")
    bn_beta_conv1 = tf.convert_to_tensor(values["bn_beta_conv1"], dtype=tf.float32,
                                         name="bn_beta_conv1")
    bn_conv1 = tf.nn.batch_normalization(pool1, mean_conv1, variance_conv1,
                                         bn_beta_conv1, bn_gamma_conv1, 0.001)
    h_conv1 = tf.nn.relu(bn_conv1)

    # Convolution Layer 2
    b_conv2 = tf.convert_to_tensor(values["b_conv2"], dtype=tf.float32, name="b_conv2")
    alphas_conv2 = tf.convert_to_tensor(values["alphas_conv2"],
                                        dtype=tf.float32, name="alphas_conv2")
    shift_parameters_conv2 = tf.convert_to_tensor(values["shift_parameters_conv2"],
                                                  dtype=tf.float32, name="shift_parameters_conv2")
    betas_conv2 = tf.convert_to_tensor(values["betas_conv2"],
                                       dtype=tf.float32, name="betas_conv2")
    # Performing the operations
    binary_filters_conv2 = tf.convert_to_tensor(values["binary_filters_conv2"], dtype=tf.float32,
                                                name="binary_filters_conv2")
    conv2 = ABC(binary_filters_conv2, tf.stop_gradient(alphas_conv2), shift_parameters_conv2,
                betas_conv2, b_conv2, padding="SAME")(h_conv1)
    # Other layers
    pool2 = max_pool_2x2(conv2)
    # batch norm parameters
    mean_conv2, variance_conv2 = bn_mean_variance(pool2)
    bn_gamma_conv2 = tf.convert_to_tensor(values["bn_gamma_conv2"], dtype=tf.float32,
                                          name="bn_gamma_conv2")
    bn_beta_conv2 = tf.convert_to_tensor(values["bn_beta_conv2"], dtype=tf.float32,
                                         name="bn_beta_conv2")
    bn_conv2 = tf.nn.batch_normalization(pool2, mean_conv2, variance_conv2,
                                         bn_beta_conv2, bn_gamma_conv2, 0.001)
    h_conv2 = tf.nn.relu(bn_conv2)

    # Flat the conv2 output
    h_conv2_flat = tf.reshape(h_conv2, shape=(-1, 7*7*64))

    # Dense layer1
    W_fc1 = tf.convert_to_tensor(values["W_fc1"], dtype=tf.float32)
    b_fc1 = tf.convert_to_tensor(values["b_fc1"], dtype=tf.float32)
    h_fc1 = tf.nn.relu(tf.matmul(h_conv2_flat, W_fc1) + b_fc1)

    # Dropout
    keep_prob = tf.placeholder(tf.float32)
    h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

    # Output layer
    W_fc2 = tf.convert_to_tensor(values["W_fc2"], dtype=tf.float32)
    b_fc2 = tf.convert_to_tensor(values["b_fc2"], dtype=tf.float32)
    y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
    
    # Labels
    y = tf.placeholder(tf.int32, [None])
    y_ = tf.one_hot(y, 10)
    
    # Defining optimizer and loss
    cross_entropy = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv))
    correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

Let's test the inference model

In [26]:
%%time
with tf.Session(graph=inference_graph) as sess:
    loss_vals = []
    acc_vals = []
    for iteration in range(1, 500 + 1):            
        X_batch, y_batch = mnist.validation.next_batch(batch_size)
        acc_val, loss_val = sess.run([accuracy, cross_entropy],
                                 feed_dict={x: batch[0], y: batch[1], keep_prob: 1.0})
        loss_vals.append(loss_val)
        acc_vals.append(acc_val)
        print("\rEvaluating the model: {}/{} ({:.1f}%)".format(iteration, 500,
            iteration * 100 / 500),
              end=" " * 10)
    loss_val = np.mean(loss_vals)
    acc_val = np.mean(acc_vals)
    print("\rEpoch: {}  Val accuracy: {:.4f}%  Loss: {:.6f}".format(
        epoch + 1, acc_val * 100, loss_val))

Epoch: 200  Val accuracy: 78.0000%  Loss: 0.884985
CPU times: user 6.03 s, sys: 832 ms, total: 6.86 s
Wall time: 5.95 s
