# Implementation of Accurate Binary Convolution Layer
The main notebook is **ABC.ipynb**. In this notebook, *alphas* training is moved out of the layer, so that the variables and functions can be made reusable for inference time.

In [179]:
from __future__ import division, print_function
import tensorflow as tf
import numpy as np
from keras.datasets import cifar10,mnist

#### See *ABC* notebook for explanation of all the functions

In [180]:
def get_mean_stddev(input_tensor):
    with tf.name_scope('mean_stddev_cal'):
        mean, variance = tf.nn.moments(input_tensor, axes=range(len(input_tensor.get_shape())))
        stddev = tf.sqrt(variance, name="standard_deviation")
        return mean, stddev
    
# TODO: Allow shift parameters to be learnable
def get_shifted_stddev(stddev, no_filters):
    with tf.name_scope('shifted_stddev'):
        spreaded_deviation = -1. + (2./(no_filters - 1)) * tf.convert_to_tensor(range(no_filters),
                                                                                dtype=tf.float32)
        return spreaded_deviation * stddev
    
def get_binary_filters(convolution_filters, no_filters, name=None):
    with tf.name_scope(name, default_name="get_binary_filters"):
        mean, stddev = get_mean_stddev(convolution_filters)
        shifted_stddev = get_shifted_stddev(stddev, no_filters)
        
        # Normalize the filters by subtracting mean from them
        mean_adjusted_filters = convolution_filters - mean
        
        # Tiling filters to match the number of filters
        expanded_filters = tf.expand_dims(mean_adjusted_filters, axis=0, name="expanded_filters")
        tiled_filters = tf.tile(expanded_filters, [no_filters] + [1] * len(convolution_filters.get_shape()),
                                name="tiled_filters")
        
        # Similarly tiling spreaded stddev to match the shape of tiled_filters
        expanded_stddev = tf.reshape(shifted_stddev, [no_filters] + [1] * len(convolution_filters.get_shape()),
                                     name="expanded_stddev")
        
        binarized_filters = tf.sign(tiled_filters + expanded_stddev, name="binarized_filters")
        return binarized_filters

Now, instead of get_alphas, implementation of **alpha training** is provided, which takes input of the *filters*, *binarized filters*, and *alphas* and returns the loss and the alpha training operation

In [181]:
def alpha_training(convolution_filters, binary_filters, alphas, no_filters):
    with tf.name_scope("alpha_training"):
        reshaped_convolution_filters = tf.reshape(convolution_filters, [-1], name="reshaped_convolution_filters")
        reshaped_binary_filters = tf.reshape(binary_filters, [no_filters, -1],
                                             name="reshaped_binary_filters")
        
        weighted_sum_filters = tf.reduce_sum(tf.multiply(alphas, reshaped_binary_filters),
                                             axis=0, name="weighted_sum_filters")
        
        # Defining loss
        error = tf.square(reshaped_convolution_filters - weighted_sum_filters, name="alphas_error")
        loss = tf.reduce_mean(error, axis=0, name="alphas_loss")
        
        # Defining optimizer
        training_op = tf.train.AdamOptimizer().minimize(loss, var_list=[alphas],
                                                        name="alphas_training_op")
        
        return training_op, loss

Now, both *ABC* and *ApproxConv* is updated to incorporate this change

In [182]:
def ApproxConv(no_filters, alphas, binary_filters, convolution_biases=None,
               strides=(1, 1), padding="VALID", name=None):
    with tf.name_scope(name, "ApproxConv"):
        if convolution_biases is None:
            biases = 0.
        else:
            biases = convolution_biases
        
        # Defining function for closure to accept multiple inputs with same filters
        def ApproxConvLayer(input_tensor, name=None):
            with tf.name_scope(name, "ApproxConv_Layer"):
                # Reshaping alphas to match the input tensor
                reshaped_alphas = tf.reshape(alphas,
                                             shape=[no_filters] + [1] * len(input_tensor.get_shape()),
                                             name="reshaped_alphas")
                
                # Calculating convolution for each binary filter
                approxConv_outputs = []
                for index in range(no_filters):
                    # Binary convolution
                    this_conv = tf.nn.conv2d(input_tensor, binary_filters[index],
                                             strides=(1,) + strides + (1,),
                                             padding=padding)
                    approxConv_outputs.append(this_conv + biases)
                conv_outputs = tf.convert_to_tensor(approxConv_outputs, dtype=tf.float32,
                                                    name="conv_outputs")
                
                # Summing up each of the binary convolution
                ApproxConv_output = tf.reduce_sum(tf.multiply(conv_outputs, reshaped_alphas), axis=0)
                
                return ApproxConv_output
        
        return ApproxConvLayer
    
def ABC(binary_filters, alphas, shift_parameters, betas, 
        convolution_biases=None, no_binary_filters=2, no_ApproxConvLayers=1,
        strides=(1, 1), padding="VALID", name=None):
    with tf.name_scope(name, "ABC"):        
        # Instantiating the ApproxConv Layer
        ApproxConvLayer= ApproxConv(no_binary_filters, alphas, binary_filters, convolution_biases,
                                    strides, padding)
        
        def ABCLayer(input_tensor, name=None):
            with tf.name_scope(name, "ABCLayer"):
                # Reshaping betas to match the input tensor
                reshaped_betas = tf.reshape(betas,
                                            shape=[no_ApproxConvLayers] + [1] * len(input_tensor.get_shape()),
                                            name="reshaped_betas")
                
                # Calculating ApproxConv for each shifted input
                ApproxConv_layers = []
                for index in range(no_ApproxConvLayers):
                    # Shifting and binarizing input
                    shifted_input = tf.clip_by_value(input_tensor + shift_parameters[index], 0., 1.,
                                                     name="shifted_input_" + str(index))
                    binarized_activation = tf.sign(shifted_input - 0.5)
                    
                    # Passing through the ApproxConv layer
                    ApproxConv_layers.append(ApproxConvLayer(binarized_activation))
                ApproxConv_output = tf.convert_to_tensor(ApproxConv_layers, dtype=tf.float32,
                                                         name="ApproxConv_output")
                
                # Taking the weighted sum using the betas
                ABC_output = tf.reduce_sum(tf.multiply(ApproxConv_output, reshaped_betas), axis=0)
                return ABC_output
        
        return ABCLayer

#### Now a layer can be created as follows

In [164]:
test_filters = np.random.normal(size=(3, 3, 1, 64))
test_biases = np.random.normal(size=(64,))
test_input = np.random.normal(size=(32, 28, 28, 1))

In [165]:
g = tf.Graph()

In [166]:
with g.as_default():
    filters = tf.Variable(tf.convert_to_tensor(test_filters, dtype=tf.float32), name="convolution_filters")
    biases = tf.Variable(tf.convert_to_tensor(test_biases, dtype=tf.float32), name="convolution_biases")
    alphas = tf.Variable(tf.constant(1., shape=(5, 1)), dtype=tf.float32,
                         name="alphas")
    shift_parameters = tf.Variable(tf.constant(0., shape=(5, 1)), dtype=tf.float32,
                                   name="shift_parameters")
    betas = tf.Variable(tf.constant(1., shape=(5, 1)), dtype=tf.float32,
                        name="betas")
    
    binary_filters = get_binary_filters(filters, 5)
    alphas_training_op, alphas_loss = alpha_training(tf.stop_gradient(filters),
                                                     tf.stop_gradient(binary_filters),
                                                     alphas, 5)
    ABC_layer = ABC(binary_filters, tf.stop_gradient(alphas), shift_parameters, betas, biases)
    
    output = ABC_layer(tf.convert_to_tensor(test_input, dtype=tf.float32))

ValueError: Cannot reshape a tensor with 5 elements to shape [1,1,1,1,1] (1 elements) for 'ABCLayer/reshaped_betas' (op: 'Reshape') with input shapes: [5,1], [5] and with input tensors computed as partial shapes: input[1] = [1,1,1,1,1].

### Testing
Let's test the updated architecture on cifar again

In [194]:
def next_experiment_dir(top_dir):
    """We need directory with consecutive subdirectories to store results of consecutive trainings. """
    dirs = [int(dirname) for dirname in os.listdir(top_dir) if os.path.isdir(os.path.join(top_dir, dirname))]
    if len(dirs) > 0:
        return os.path.join(top_dir, str(max(dirs) + 1))
    else:
        return os.path.join(top_dir, '1')


def prepare_input(data, mu=None, sigma=None):
    """
    Normalizes pixels across dataset. For training set, calculate mu and sigma. For test set, transfer these
    from training set.

    :param data: dataset
    :param mu: mean pixel value across dataset. Calculated if not provided.
    :param sigma: standard deviation of pixel value across dataset. Calculated if not provided.
    :return: normalized dataset, mean and standard deviation
    """
    if mu is None:
        mu = np.mean(data)
    if sigma is None:
        sigma = np.std(data)
    data = data - mu
    data = data / sigma
    return data, mu, sigma

image_size = 32
num_labels = 10
num_channels = 3 # RGB

(x_train, y_train), (x_test, y_test) = cifar10.load_data()

#(x_train, y_train), (x_test, y_test) = cifar10.load_data()
 
x_train, mu_train, sigma_train = prepare_input(x_train)
x_test, _, _ = prepare_input(x_test, mu_train, sigma_train)

#this part is needed to reshape mnist. Cifar doesn't matter. just comment it out if cifar
#x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
#x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)
#y_train = y_train.reshape(y_train.shape[0], 1)
#y_test = y_test.reshape(y_test.shape[0], 1)

print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)


(60000, 28, 28, 1)
(60000, 1)
(10000, 28, 28, 1)
(10000, 1)


In [195]:
################################
import cv2
import random
from keras.utils.np_utils import to_categorical
import uuid

#initial_epoch = 0
#nb_epoch = 10
#batch_size = 64
#validation_split = 0.2 
#input_shape = (67, 67, 3)

#nb_classes = 10


def load_image(img):
    # Load image with 3 channel colors
    # img = cv2.imread(img_path, flags=1)
    name = str(uuid.uuid4())

    # Convert to rgb
    img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
    # cv2.imwrite('/tmp/test/%s.jpg' % name, img)
    # Image needs to the resized to (227x227), but we want to maintain the aspect ratio.
    # height = img.shape[0]
    # width = img.shape[1]
    # offset = int(round(max(height, width) / 2.0))

    # # Add borders to the images.
    # padded_img = cv2.copyMakeBorder(img, offset, offset, offset, offset, cv2.BORDER_CONSTANT)
    # padded_height = padded_img.shape[0]
    # padded_width = padded_img.shape[1]
    # center_x = int(round(padded_width / 2.0))
    # center_y = int(round(padded_height / 2.0))
    # # Crop the square containing the full image.
    # cropped_img = padded_img[center_y - offset: center_y + offset, center_x - offset: center_x + offset]

    # Resize image to 227, 227 as Squeezenet only accepts this format.
    resized_image = cv2.resize(img, (input_shape[0], input_shape[1])).astype('float32')
    return resized_image

#(X_train, y_train), (X_test, y_test) = mnist.load_data()
## Shuffle lists
#train_zipped = zip(X_train, y_train)
#test_zipped = zip(X_test, y_test)


#random.shuffle(train_zipped)
#random.shuffle(train_zipped)
#random.shuffle(train_zipped)
#random.shuffle(test_zipped)
#random.shuffle(test_zipped)
#random.shuffle(test_zipped)

#X_train[:], y_train[:] = zip(*train_zipped)
#X_test[:], y_test[:] = zip(*test_zipped)

#X_train = X_train.reshape(X_train.shape[0], 28, 28, 1)
#X_test = X_test.reshape(X_test.shape[0], 28, 28, 1)

#X_train = X_train.astype('float32')
#X_test = X_test.astype('float32')
#X_train /= 255
#X_test /= 255
#Y_train = to_categorical(y_train, nb_classes)
#Y_test = to_categorical(y_test, nb_classes)

def gen(x, y, nb=50): 
    for i in xrange(len(x)):
        j = random.randint(i, len(x)-2000)
        newy = y[j: j+nb]
        newx = x[j:j+nb]
        #newx = np.asarray([load_image(img) for img in x[j:j+nb]])
        return (newx,newy)
        #yield (np.asarray([load_image(img) for img in x[j:j+nb]]),y[j: j+nb])
        # yield (np.asarray([load_image(img) for img in x[j*nb:j*nb+nb]]), y[j*nb:j*nb+nb])

# the data, shuffled and split between train and test sets
# train, test = mnist.load_data()
# train = zip(*train)
# test = zip(*test)

#samples_per_epoch = 3000 #len(training_images) - 20
#nb_val_samples = 300 # len(validation_images) - 20

# Generator expression. Yields two tuples (image, class). Use generator because images might not fit into memory,
# training_data = ( (load_image(x), to_categorical([y], nb_classes=nb_classes)) for x, y in train )
# validation_data = ( (load_image(x), to_categorical([y], nb_classes=nb_classes)) for x, y in test )
# training_data = gen(X_train, Y_train)
# validation_data = gen(X_test, Y_test)

#x_train,y_train = gen(x_train, y_train, nb=6000) #change this to get bigger dataset
#x_test,y_test = gen(x_test, y_test, nb=1000) #change this to get more validation set


#print (X_train.shape)
#print (Y_test.shape)

num_images_train = x_train.shape[0]
num_images_valid = x_test.shape[0]

print (x_train.shape)
print (y_train.shape)
print (x_test.shape)
print (y_test.shape)
##############

(60000, 28, 28, 1)
(60000, 1)
(10000, 28, 28, 1)
(10000, 1)


The following is exactly same as in the other notebook *ABC*

In [196]:
# Defining utils function
def weight_variable(shape, name="weight"):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial, name=name)

def bias_variable(shape, name="bias"):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial, name=name)

def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                        strides=[1, 2, 2, 1], padding='SAME')


In [197]:

def squeeze(input, channels, layer_num):
    """
    Defines squeezed block for fire module.

    :param input: input tensor
    :param channels: number of output channels
    :param layer_num: layer number for naming purposes
    :return: output tensor convoluted with squeeze layer
    """
    layer_name = 'squeeze_' + str(layer_num)
    input_channels = input.get_shape().as_list()[3]

    with tf.name_scope(layer_name):
        weights = tf.Variable(tf.contrib.layers.xavier_initializer()([1, 1, input_channels, channels]))
        biases = tf.Variable(tf.zeros([1, 1, 1, channels]), name='biases')
        onebyone = tf.nn.conv2d(input, weights, strides=(1, 1, 1, 1), padding='VALID') + biases
        A = tf.nn.relu(onebyone)

        tf.summary.histogram('weights', weights)
        tf.summary.histogram('biases', biases)
        tf.summary.histogram('logits', onebyone)
        tf.summary.histogram('activations', A)

    return A

# define expand module
def expand(input, channels_1by1, channels_3by3, layer_num):
    """
    Defines expand block for fire module.
    :param input: input tensor
    :param channels_1by1: number of output channels in 1x1 layers
    :param channels_3by3: number of output channels in 3x3 layers
    :param layer_num: layer number for naming purposes
    :return: output tensor convoluted with expand layer
    """

    layer_name = 'expand_' + str(layer_num)
    input_channels = input.get_shape().as_list()[3]

    with tf.name_scope(layer_name):
        weights1x1 = tf.Variable(tf.contrib.layers.xavier_initializer()([1, 1, input_channels, channels_1by1]))
        biases1x1 = tf.Variable(tf.zeros([1, 1, 1, channels_1by1]), name='biases')
        onebyone = tf.nn.conv2d(input, weights1x1, strides=(1, 1, 1, 1), padding='VALID') + biases1x1
        A_1x1 = tf.nn.relu(onebyone)

        tf.summary.histogram('weights_1x1', weights1x1)
        tf.summary.histogram('biases_1x1', biases1x1)
        tf.summary.histogram('logits_1x1', onebyone)
        tf.summary.histogram('activations_1x1', A_1x1)

        weights3x3 = tf.Variable(tf.contrib.layers.xavier_initializer()([1, 1, input_channels, channels_3by3]))
        biases3x3 = tf.Variable(tf.zeros([1, 1, 1, channels_3by3]), name='biases')
        threebythree = tf.nn.conv2d(input, weights3x3, strides=(1, 1, 1, 1), padding='SAME') + biases3x3
        A_3x3 = tf.nn.relu(threebythree)

        tf.summary.histogram('weights_3x3', weights3x3)
        tf.summary.histogram('biases_3x3', biases3x3)
        tf.summary.histogram('logits_3x3', threebythree)
        tf.summary.histogram('activations_3x3', A_3x3)

    return tf.concat([A_1x1, A_3x3], axis=3)


# define fire module
def fire_module(input, squeeze_channels, expand_channels_1by1, expand_channels_3by3, layer_num):
    """
    Train fire module. Fire module does not change input height and width, only depth.
    :param input: input tensor
    :param squeeze_channels: number of channels for 1x1 squeeze layer
    :param expand_channels_1by1: number of channels for 1x1 expand layer
    :param expand_channels_3by3: number of channels for 3x3 expand layer
    :param layer_num: number of layer for naming purposes only
    :return: a tensor of shape [input_height x input_width x expand_channels_1by1 * expand_channels_3by3]
    """
    with tf.name_scope('fire_' + str(layer_num)):
        squeeze_output = squeeze(input, squeeze_channels, layer_num)
        return expand(squeeze_output, expand_channels_1by1, expand_channels_3by3, layer_num)

In [198]:
#create graph

#filters = [64,64,128,128,192,192,256,256]
#squeezes= [16,16, 32, 32, 48, 48, 64, 64]

#filters = [8,8,16,16,24,24,32,32]
#squeezes= [2,2,4, 4, 6, 6, 8, 8]
without_ABC_graph = tf.Graph()
with without_ABC_graph.as_default():
    # define placeholders
        pooling_size=(1, 2, 2, 1)
        x = tf.placeholder(tf.float32,
                                     shape=[None, 28, 28, 1],
                                     name='x')
        x_image = x

        keep_prob = tf.placeholder(tf.float32)
        y = tf.placeholder(tf.int32, [None,1],name='y')
        in_training = tf.placeholder(tf.bool, shape=())
        learning_rate = tf.placeholder(tf.float32, shape=())

        tf.summary.image('x', x)
    # define structure of the net
    # layer 1 - conv 1
        with tf.name_scope('conv_1'):
            W_conv1 = tf.Variable(tf.contrib.layers.xavier_initializer()([5, 5, 1, 96]),name='W_conv1')
            b_conv1 = tf.Variable(tf.zeros([1, 1, 1, 96]),name='b_conv1')
            X_1 = tf.nn.conv2d(x_image, W_conv1, strides=(1, 2, 2, 1), padding='VALID') + b_conv1
            A_1 = tf.nn.relu(X_1)
            tf.summary.histogram('conv1_weights', W_conv1)
            tf.summary.histogram('conv1_biases', b_conv1)
            tf.summary.histogram('conv1_logits', X_1)
            tf.summary.histogram('conv1_activations', A_1)
        #maxpool2
        maxpool_1 = tf.nn.max_pool(A_1, ksize=pooling_size, strides=(1, 2, 2, 1), padding='VALID', name='maxpool_1')

        # layer 3-5 - fire modules
        fire_2 = fire_module(maxpool_1, 16, 64, 64, layer_num=2)
        fire_3 = fire_module(fire_2, 16, 64, 64, layer_num=3)
        fire_4 = fire_module(fire_3, 32, 128, 128, layer_num=4)

        # layer 6 - maxpool
        maxpool_4 = tf.nn.max_pool(fire_4, ksize=pooling_size, strides=(1, 2, 2, 1), padding='VALID', name='maxpool_4')

        # layer 7-10 - fire modules
        fire_5 = fire_module(maxpool_4, 32, 128, 128, layer_num=5)
        fire_6 = fire_module(fire_5, 48, 192, 192, layer_num=6)
        fire_7 = fire_module(fire_6, 48, 192, 192, layer_num=7)
        fire_8 = fire_module(fire_7, 64, 256, 256, layer_num=8)

        # layer 11 - maxpool
        maxpool_8 = tf.nn.max_pool(fire_8, ksize=pooling_size, strides=(1, 2, 2, 1), padding='VALID', name='maxpool_8')

        # layer 12 - fire 9 + dropout
        fire_9 = fire_module(maxpool_8, 64, 256, 256, layer_num=9)

        dropout_9 = tf.cond(in_training,
                            lambda: tf.nn.dropout(fire_9, keep_prob=0.5),
                            lambda: fire_9)

        input_shape = dropout_9.get_shape().as_list()[3]
        # layer 13 - final
        with tf.name_scope('final'):
            W_conv10 = tf.Variable(tf.contrib.layers.xavier_initializer()([1, 1, input_shape, 10]),name="W_conv10")
            b_conv10 = tf.Variable(tf.zeros([1, 1, 1, 10]),name="b_conv10")
            conv_10 = tf.nn.conv2d(dropout_9, W_conv10, strides=(1, 1, 1,1), padding='VALID') + b_conv10
            A_conv_10 = tf.nn.relu(conv_10)

            tf.summary.histogram('conv10_weights', W_conv10)
            tf.summary.histogram('conv10_biases', b_conv10)
            tf.summary.histogram('conv10_logits', conv_10)
            tf.summary.histogram('conv10_activations', A_conv_10)
        
        # avg pooling to get [1 x 1 x num_classes] must average over entire window oh H x W from input layer
        _, H_last, W_last, _ = A_conv_10.get_shape().as_list()
        pooled = tf.nn.avg_pool(A_conv_10, ksize=(1, H_last, W_last, 1), strides=(1, 1, 1, 1), padding='VALID')
        logits = tf.squeeze(pooled, axis=[1, 2])

        # loss + optimizer
        #one_hot_labels = y   
        one_hot_labels = tf.one_hot(y, 10, name='one_hot_encoding')
        loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=one_hot_labels, logits=logits))
        tf.summary.scalar('loss', loss)
        optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)

        # accuracy
        predictions = tf.reshape(tf.argmax(tf.nn.softmax(logits), axis=1, output_type=tf.int32), [-1, 1])
        accuracy = tf.reduce_mean(tf.cast(tf.equal(predictions, y), dtype=tf.float32))
        tf.summary.scalar('train_accuracy', accuracy)

        summaries = tf.summary.merge_all()
        test_accuracy_summary = tf.summary.scalar('test_accuracy', accuracy)


        graph_init = tf.global_variables_initializer()

In [199]:
variables_to_save = {"W_conv1":W_conv1,
                     "b_conv1":b_conv1,
                     "W_conv10":W_conv10,
                     "b_conv10":b_conv10}


In [200]:
values = {}

In [None]:
import cPickle as pickle

n_epochs = 5
minibatch_size = 128
iterations = 10000 #this get u to 92%, anything more is overfitting
import os
with tf.Session(graph=without_ABC_graph) as sess:
    sess.run(graph_init)
    experiment_dir = next_experiment_dir('/tmp/squeezenet')
    print("Creating output dir:", experiment_dir)
    train_writer = tf.summary.FileWriter(experiment_dir, sess.graph)

    for i in range(iterations):
            # pick random minibatch
        mb_start = np.random.randint(0,num_images_train  - minibatch_size)
        mb_end = mb_start + minibatch_size
        mb_data = x_train[mb_start:mb_end, :, :, :]
        mb_labels = y_train[mb_start:mb_end, :]

        feed_dict = {
            x: mb_data,
            y: mb_labels,
            keep_prob: 0.5,
            in_training: True,
            learning_rate: 0.001  #0.0004
        }
       # print(i)
        collectibles = [loss, accuracy, summaries, optimizer]

        loss_val, accuracy_val, s, _ = sess.run(collectibles, feed_dict=feed_dict)

        train_writer.add_summary(s, i)

        if i % 100 == 0:
            feed_dict = {
                x: x_test,
                y: y_test,
                keep_prob: 1.0,
                in_training: False,
                learning_rate: 0.001
            }
            test_acc, sum_acc = sess.run([accuracy, test_accuracy_summary], feed_dict=feed_dict)
            train_writer.add_summary(sum_acc, i)
            print('Iteration: {}\t loss: {:.3f}\t accuracy: {:.3f}\t test accuracy: {:.3f}'.format(
                i, loss_val, accuracy_val, test_acc))
    print("Saving")
    for var_name in variables_to_save:
        values[var_name] = sess.run(variables_to_save[var_name])
    pickle.dump(values, open("Smallsave.p","wb"))

        
        
    # On completion of training, save the variables to be fed to custom model
   # for var_name in variables_to_save:   #     values[var_name] = sess.run(variables_to_save[var_name])

Creating output dir: /tmp/squeezenet/11
Iteration: 0	 loss: 2.303	 accuracy: 0.117	 test accuracy: 0.101
Iteration: 100	 loss: 2.064	 accuracy: 0.250	 test accuracy: 0.281
Iteration: 200	 loss: 1.727	 accuracy: 0.297	 test accuracy: 0.336
Iteration: 300	 loss: 1.437	 accuracy: 0.492	 test accuracy: 0.485
Iteration: 400	 loss: 1.220	 accuracy: 0.500	 test accuracy: 0.590
Iteration: 500	 loss: 0.824	 accuracy: 0.711	 test accuracy: 0.729
Iteration: 600	 loss: 0.611	 accuracy: 0.773	 test accuracy: 0.760
Iteration: 700	 loss: 0.951	 accuracy: 0.711	 test accuracy: 0.769
Iteration: 800	 loss: 0.445	 accuracy: 0.852	 test accuracy: 0.802
Iteration: 900	 loss: 0.536	 accuracy: 0.805	 test accuracy: 0.808
Iteration: 1000	 loss: 0.641	 accuracy: 0.789	 test accuracy: 0.830
Iteration: 1100	 loss: 0.416	 accuracy: 0.891	 test accuracy: 0.840
Iteration: 1200	 loss: 0.419	 accuracy: 0.898	 test accuracy: 0.842
Iteration: 1300	 loss: 0.531	 accuracy: 0.820	 test accuracy: 0.859
Iteration: 1400	 los

In [18]:
#we are using the model w 10,000 samples and 2000 valid so 92% is pretty good. Stop at 5000 

The 100% accuracy is not an error. It is due to the fact that complete validation set is not being evaluated only part of it is being evaluated and our model got all right answers in that part

#### Creating the custom model
While creating the custom model, we will need to create all the variables ourself.

First let's create a function that returns the required mean and variance for the batchnorm layer. Batchnorm layer requires that mean and variance be calculated of every layer except that of the channels layer

In [154]:
import cPickle as pickle
values = pickle.load(open("save_slow.p","rb"))

def bn_mean_variance(input_tensor, axis=-1, keep_dims=True):
    shape = len(input_tensor.get_shape())
    if axis < 0:
        axis += shape
    dimension_range = range(shape)
    return tf.nn.moments(input_tensor, axes=dimension_range[:axis] + dimension_range[axis+1:],
                         keep_dims=keep_dims)

In [157]:
custom_graph = tf.Graph()
with custom_graph.as_default():
    alphas_training_operations = []
    alphas_variables = []
    
    # Setting configuration
    no_filters_conv1 = 2
    no_layers_conv1 = 1
    no_filters_conv10 = 2
    no_layers_conv10 = 1
    
    pooling_size=(1, 2, 2, 1)

    # Inputs
    #x = tf.placeholder(dtype=tf.float32)
    #x_image = tf.reshape(x, [-1, 28, 28, 1])
    x = tf.placeholder(tf.float32,
                                     shape=[None, 28, 28, 1],  #use 32, 32, 3 for cifar
                                     name='x')
    #x_image = tf.reshape(x, [-1, 28, 28, 1])
    x_image = x
    keep_prob = tf.placeholder(tf.float32)
    y = tf.placeholder(tf.int32, [None,1],name='y')
    in_training = tf.placeholder(tf.bool, shape=())
    learning_rate = tf.placeholder(tf.float32, shape=())
    # Convolution Layer 1
    W_conv1 = tf.Variable(values["W_conv1"], name="W_conv1")
    b_conv1 = tf.Variable(values["b_conv1"], name="b_conv1")
    #W_conv1 = tf.Variable(tf.contrib.layers.xavier_initializer()([5, 5, 1, 96]),name='W_conv1')
    #b_conv1 = tf.Variable(tf.zeros([1, 1, 1, 96]),name='b_conv1')
    #X_1 = tf.nn.conv2d(x_image, W_conv1, strides=(1, 2, 2, 1), padding='VALID') + b_conv1
    
    # Creating new variables
    alphas_conv1 = tf.Variable(tf.constant(1., shape=(no_filters_conv1, 1)),
                               dtype=tf.float32, name="alphas_conv1")
    shift_parameters_conv1 = tf.Variable(tf.constant(0., shape=(no_layers_conv1, 1)),
                                         dtype=tf.float32, name="shift_parameters_conv1")
    betas_conv1 = tf.Variable(tf.constant(1., shape=(no_layers_conv1, 1)),
                              dtype=tf.float32, name="betas_conv1")
    # Performing the operations
    binary_filters_conv1 = get_binary_filters(W_conv1, no_filters_conv1)
    alpha_training_conv1, alpha_loss_conv1 = alpha_training(tf.stop_gradient(W_conv1, "no_gradient_W_conv1"),
                                                            tf.stop_gradient(binary_filters_conv1,
                                                                             "no_gradient_binary_filters_conv1"),
                                                            alphas_conv1, no_filters_conv1)
    conv1 = ABC(binary_filters_conv1, tf.stop_gradient(alphas_conv1), shift_parameters_conv1,
                betas_conv1, b_conv1, padding="SAME")(x_image)
    # Saving the alphas training operation and the variable
    alphas_training_operations.append(alpha_training_conv1)
    alphas_variables.append(alphas_conv1)
    
    
    # Other layers
   # A_1 = tf.nn.relu(conv1)
        #maxpool
   # maxpool_1 = tf.nn.max_pool(A_1, ksize=pooling_size, strides=(1, 2, 2, 1), padding='VALID', name='maxpool_1')
    
    maxpool_1 = tf.nn.max_pool(conv1, ksize=pooling_size, strides=(1, 2, 2, 1), padding='VALID', name='maxpool_1')
    ##batch Norm
    mean_conv1, variance_conv1 = bn_mean_variance(maxpool_1)
    bn_gamma_conv1 = tf.Variable(tf.ones(shape=(96,), dtype=tf.float32), name="bn_gamma_conv1")
    bn_beta_conv1 = tf.Variable(tf.zeros(shape=(96,), dtype=tf.float32), name="bn_beta_conv1")
    bn_conv1 = tf.nn.batch_normalization(maxpool_1, mean_conv1, variance_conv1,
                                         bn_beta_conv1, bn_gamma_conv1, 0.001)
    
    A_1 = tf.nn.relu(bn_conv1)
    
        # layer 3-5 - fire modules
   # fire_2 = fire_module(maxpool_1, 16, 64, 64, layer_num=2)
    fire_2 = fire_module(A_1, 16, 64, 64, layer_num=2)

    fire_3 = fire_module(fire_2, 16, 64, 64, layer_num=3)
    fire_4 = fire_module(fire_3, 32, 128, 128, layer_num=4)

        # layer 6 - maxpool
    maxpool_4 = tf.nn.max_pool(fire_4, ksize=pooling_size, strides=(1, 2, 2, 1), padding='VALID', name='maxpool_4')

        # layer 7-10 - fire modules
    fire_5 = fire_module(maxpool_4, 32, 128, 128, layer_num=5)
    fire_6 = fire_module(fire_5, 48, 192, 192, layer_num=6)         
    fire_7 = fire_module(fire_6, 48, 192, 192, layer_num=7)
    fire_8 = fire_module(fire_7, 64, 256, 256, layer_num=8)

        # layer 11 - maxpool
    maxpool_8 = tf.nn.max_pool(fire_8, ksize=pooling_size, strides=(1, 2, 2, 1), padding='VALID', name='maxpool_8')

        # layer 12 - fire 9 + dropout
    fire_9 = fire_module(maxpool_8, 64, 256, 256, layer_num=9)

    dropout_9 = tf.cond(in_training,
                        lambda: tf.nn.dropout(fire_9, keep_prob=0.5),
                        lambda: fire_9)

    input_shape = dropout_9.get_shape().as_list()[3]
    # BatchNorm 

    # Convolution Layer 10
    W_conv10 = tf.Variable(values["W_conv10"], name="W_conv10")
    b_conv10 = tf.Variable(values["b_conv10"], name="b_conv10")
    
    # Creating new variables
    alphas_conv10 = tf.Variable(tf.constant(1., shape=(no_filters_conv10, 1)),
                               dtype=tf.float32, name="alphas_conv10")
    shift_parameters_conv10 = tf.Variable(tf.constant(0., shape=(no_layers_conv10, 1)),
                                         dtype=tf.float32, name="shift_parameters_conv10")
    betas_conv10 = tf.Variable(tf.constant(1., shape=(no_layers_conv10, 1)),
                              dtype=tf.float32, name="betas_conv10")
    
    # Performing the operations
    binary_filters_conv10 = get_binary_filters(W_conv10, no_filters_conv10)
    alpha_training_conv10, alpha_loss_conv10 = alpha_training(tf.stop_gradient(W_conv10, "no_gradient_W_conv10"),
                                                            tf.stop_gradient(binary_filters_conv10,
                                                                             "no_gradient_binary_filters_conv10"),
                                                            alphas_conv10, no_filters_conv10)
    conv10 = ABC(binary_filters_conv10, tf.stop_gradient(alphas_conv10), shift_parameters_conv10,
                betas_conv10, b_conv10, padding="SAME")(dropout_9) #connect to dropout 9
    
    # Saving the alphas training operation and the variable
    alphas_training_operations.append(alpha_training_conv10)
    alphas_variables.append(alphas_conv10)
    
    # Other layers
 #   A_conv_10 = tf.nn.relu(conv10)
    # BatchNorm
   # _, H_last, W_last, _ = A_conv_10.get_shape().as_list()
    _, H_last, W_last, _ = conv10.get_shape().as_list()

    #pooled = tf.nn.avg_pool(A_conv_10, ksize=(1, H_last, W_last, 1), strides=(1, 1, 1, 1), padding='VALID')
    pooled = tf.nn.avg_pool(conv10, ksize=(1, H_last, W_last, 1), strides=(1, 1, 1, 1), padding='VALID')
    #batchnorm
    mean_conv10, variance_conv10 = bn_mean_variance(pooled)
    bn_gamma_conv10 = tf.Variable(tf.ones(shape=(10,), dtype=tf.float32), name="bn_gamma_conv10")
    bn_beta_conv10 = tf.Variable(tf.zeros(shape=(10,), dtype=tf.float32), name="bn_beta_conv10")
    bn_conv10 = tf.nn.batch_normalization(pooled, mean_conv10, variance_conv10,
                                         bn_beta_conv10, bn_gamma_conv10, 0.001)
    A_conv_10 = tf.nn.relu(bn_conv10)
    logits = tf.squeeze(A_conv_10, axis=[1, 2])

  #  logits = tf.squeeze(pooled, axis=[1, 2])

    one_hot_labels = tf.one_hot(y, 10, name='one_hot_encoding')
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=one_hot_labels, logits=logits))
    tf.summary.scalar('loss', loss)
    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)

        # accuracy
    predictions = tf.reshape(tf.argmax(tf.nn.softmax(logits), axis=1, output_type=tf.int32), [-1, 1])
    accuracy = tf.reduce_mean(tf.cast(tf.equal(predictions, y), dtype=tf.float32))
    tf.summary.scalar('train_accuracy', accuracy)

    summaries = tf.summary.merge_all()
    test_accuracy_summary = tf.summary.scalar('test_accuracy', accuracy)

    #init
    graph_init = tf.global_variables_initializer()
    alphas_init = tf.variables_initializer(alphas_variables)

Let's create the dictionary of variables to save

In [158]:
# Defining variables to save. These will be fed to our custom layer
variables_to_save = {"W_conv1": W_conv1,
                     "b_conv1": b_conv1,
                     "alphas_conv1": alphas_conv1,
                     "betas_conv1": betas_conv1,
                     "shift_parameters_conv1": shift_parameters_conv1,
                     "W_conv10": W_conv10,
                     "b_conv10": b_conv10,
                     "alphas_conv10": alphas_conv10,
                     "betas_conv10": betas_conv10,
                     "shift_parameters_conv10": shift_parameters_conv10,
                    }
values = {}

In [159]:
%%time
n_epochs = 5
minibatch_size = 128
alpha_training_epochs = 100
iterations = 2000 #this get u to 92%, anything more is overfitting
import os
print("hello?")
with tf.Session(graph=custom_graph) as sess:
    sess.run(graph_init)
  #  experiment_dir = next_experiment_dir('/tmp/squeezenet')
  #  print("Creating output dir:", experiment_dir)
  #  train_writer = tf.summary.FileWriter(experiment_dir, sess.graph)

    for i in range(iterations):
        # Training alphas
        sess.run(alphas_init)
        for alpha_training_op in alphas_training_operations:
            for alpha_epoch in range(alpha_training_epochs):
                sess.run(alpha_training_op)
        # pick random minibatch
        mb_start = np.random.randint(0,num_images_train  - minibatch_size)
        mb_end = mb_start + minibatch_size
        mb_data = x_train[mb_start:mb_end, :, :, :]
        mb_labels = y_train[mb_start:mb_end, :]

        feed_dict = {
            x: mb_data,
            y: mb_labels,
            keep_prob: 0.5,
            in_training: True,
            learning_rate: 0.0004
        }

        collectibles = [loss, accuracy, summaries, optimizer]

        loss_val, accuracy_val, s, _ = sess.run(collectibles, feed_dict=feed_dict)

     #   train_writer.add_summary(s, i)

        if i % 100 == 0: #after 100 iters
            # Training alphas
            sess.run(alphas_init)
            for alpha_training_op in alphas_training_operations:
                for alpha_epoch in range(alpha_training_epochs):
                    sess.run(alpha_training_op)
            feed_dict = {
                x: x_test,
                y: y_test,
                keep_prob: 1.0,
                in_training: False,
                learning_rate: 0.0004 
            }
            test_acc, sum_acc = sess.run([accuracy, test_accuracy_summary], feed_dict=feed_dict)
      #      train_writer.add_summary(sum_acc, i)
            print('Iteration: {}\t loss: {:.3f}\t accuracy: {:.3f}\t test accuracy: {:.3f}'.format(
                i, loss_val, accuracy_val, test_acc))
    print("Saving")
    for var_name in variables_to_save:
        values[var_name] = sess.run(variables_to_save[var_name])
    pickle.dump(values, open("save2.p","wb"))


#n_epochs = 5
#batch_size = 32
#alpha_training_epochs = 200
        
#with tf.Session(graph=custom_graph) as sess:
#    sess.run(graph_init)
#    for epoch in range(n_epochs):
#        for iteration in range(1, 200 + 1):
            # Training alphas
#            sess.run(alphas_init)
#            for alpha_training_op in alphas_training_operations:
#                for alpha_epoch in range(alpha_training_epochs):
#                    sess.run(alpha_training_op)
            
#            batch = mnist.train.next_batch(50)
            
            # Run operation and calculate loss
#            _, loss_train = sess.run([train_step, cross_entropy],
#                                     feed_dict={x: batch[0], y: batch[1], keep_prob: 0.5})
#            print("\rIteration: {}/{} ({:.1f}%)  Loss: {:.5f}".format(
#                      iteration, 200,
#                      iteration * 100 / 200,
#                      loss_train),
#                  end="")

        # At the end of each epoch,
        # measure the validation loss and accuracy:
        
        # Training alphas
#        sess.run(alphas_init)
#        for alpha_training_op in alphas_training_operations:
#            for alpha_epoch in range(alpha_training_epochs):
#                sess.run(alpha_training_op)
                    
#        loss_vals = []
#        acc_vals = []
#        for iteration in range(1, 200 + 1):            
#            X_batch, y_batch = mnist.validation.next_batch(batch_size)
#            acc_val, loss_val = sess.run([accuracy, cross_entropy],
#                                     feed_dict={x: batch[0], y: batch[1], keep_prob: 1.0})
#            loss_vals.append(loss_val)
#            acc_vals.append(acc_val)
#            print("\rEvaluating the model: {}/{} ({:.1f}%)".format(iteration, 200,
#                iteration * 100 / 200),
#                  end=" " * 10)
#        loss_val = np.mean(loss_vals)
#        acc_val = np.mean(acc_vals)
#        print("\rEpoch: {}  Val accuracy: {:.4f}%  Loss: {:.6f}".format(
#            epoch + 1, acc_val * 100, loss_val))
        
    # On completion of training, save the variables to be fed to custom model
#    for var_name in variables_to_save:
#        values[var_name] = sess.run(variables_to_save[var_name])

hello?
Iteration: 0	 loss: 2.303	 accuracy: 0.117	 test accuracy: 0.100
Iteration: 100	 loss: 2.301	 accuracy: 0.148	 test accuracy: 0.095
Iteration: 200	 loss: 2.302	 accuracy: 0.125	 test accuracy: 0.095
Iteration: 300	 loss: 2.300	 accuracy: 0.141	 test accuracy: 0.095
Iteration: 400	 loss: 2.299	 accuracy: 0.148	 test accuracy: 0.095
Iteration: 500	 loss: 2.304	 accuracy: 0.086	 test accuracy: 0.095
Iteration: 600	 loss: 2.303	 accuracy: 0.094	 test accuracy: 0.095


KeyboardInterrupt: 

Now, only the required variables can be saved for inference time. Using the **W_conv1** and **W_conv2**, values for binary filters and alphas can be calculated and those can be used along with **shift_parameters** and **betas** to create ABC layer for inference

### Pure inference testing
OK! Let's extract the binary filters and alphas and throw away the weights and test our network. This will ensure that we do not have any bug in the implementation of the ABC layer

Creating graphs for alphas calculation

In [None]:
alpha1_cal_graph = tf.Graph()
with alpha1_cal_graph.as_default():
    alphas1 = tf.Variable(tf.zeros(shape=(no_filters_conv1, 1), dtype=tf.float32))
    conv_filters1 = tf.placeholder(dtype=tf.float32, shape=(5, 5, 1, 32))
    bin_filters1 = get_binary_filters(convolution_filters=conv_filters1,
                                     no_filters=no_filters_conv1)
    alpha_training_op1, alpha_training_loss1 = alpha_training(conv_filters1, bin_filters1,
                                                            alphas1, no_filters_conv1)
    al_init1 = tf.global_variables_initializer()
    
alpha2_cal_graph = tf.Graph()
with alpha2_cal_graph.as_default():
    alphas2 = tf.Variable(tf.zeros(shape=(no_filters_conv2, 1), dtype=tf.float32))
    conv_filters2 = tf.placeholder(dtype=tf.float32, shape=(5, 5, 32, 64))
    bin_filters2 = get_binary_filters(convolution_filters=conv_filters2,
                                     no_filters=no_filters_conv2)
    alpha_training_op2, alpha_training_loss2 = alpha_training(conv_filters2, bin_filters2,
                                                            alphas2, no_filters_conv2)
    al_init2 = tf.global_variables_initializer()

Calculating alphas and binary filters

In [None]:
with tf.Session(graph=alpha1_cal_graph) as sess:
    al_init1.run()
    for epoch in range(200):
        sess.run(alpha_training_op1, feed_dict={conv_filters1: values["W_conv1"]})
    cal_bin_filters, cal_alphas = sess.run([bin_filters1, alphas1], feed_dict={conv_filters1: values["W_conv1"]})
    values["binary_filters_conv1"] = cal_bin_filters
    values["alphas_conv1"] = cal_alphas

with tf.Session(graph=alpha2_cal_graph) as sess:
    al_init2.run()
    for epoch in range(200):
        sess.run(alpha_training_op2, feed_dict={conv_filters2: values["W_conv2"]})
    cal_bin_filters, cal_alphas = sess.run([bin_filters2, alphas2], feed_dict={conv_filters2: values["W_conv2"]})
    values["binary_filters_conv2"] = cal_bin_filters
    values["alphas_conv2"] = cal_alphas

#### Building inference model
Now, we have all our variables, let's build an inference model

In [None]:
inference_graph = tf.Graph()
with inference_graph.as_default():
    # Setting configuration
    no_filters_conv1 = 5
    no_layers_conv1 = 5
    no_filters_conv2 = 5
    no_layers_conv2 = 5
    
    # Inputs
    x = tf.placeholder(dtype=tf.float32)
    x_image = tf.reshape(x, [-1, 28, 28, 1])
    
    # Convolution Layer 1
    b_conv1 = tf.convert_to_tensor(values["b_conv1"], dtype=tf.float32, name="b_conv1")
    alphas_conv1 = tf.convert_to_tensor(values["alphas_conv1"],
                                        dtype=tf.float32, name="alphas_conv1")
    shift_parameters_conv1 = tf.convert_to_tensor(values["shift_parameters_conv1"],
                                                  dtype=tf.float32, name="shift_parameters_conv1")
    betas_conv1 = tf.convert_to_tensor(values["betas_conv1"],
                                       dtype=tf.float32, name="betas_conv1")
    # Performing the operations
    binary_filters_conv1 = tf.convert_to_tensor(values["binary_filters_conv1"], dtype=tf.float32,
                                                name="binary_filters_conv1")
    conv1 = ABC(binary_filters_conv1, tf.stop_gradient(alphas_conv1), shift_parameters_conv1,
                betas_conv1, b_conv1, padding="SAME")(x_image)
    # Other layers
    pool1 = max_pool_2x2(conv1)
    # batch norm parameters
    mean_conv1, variance_conv1 = bn_mean_variance(pool1)
    bn_gamma_conv1 = tf.convert_to_tensor(values["bn_gamma_conv1"], dtype=tf.float32,
                                          name="bn_gamma_conv1")
    bn_beta_conv1 = tf.convert_to_tensor(values["bn_beta_conv1"], dtype=tf.float32,
                                         name="bn_beta_conv1")
    bn_conv1 = tf.nn.batch_normalization(pool1, mean_conv1, variance_conv1,
                                         bn_beta_conv1, bn_gamma_conv1, 0.001)
    h_conv1 = tf.nn.relu(bn_conv1)

    # Convolution Layer 2
    b_conv2 = tf.convert_to_tensor(values["b_conv2"], dtype=tf.float32, name="b_conv2")
    alphas_conv2 = tf.convert_to_tensor(values["alphas_conv2"],
                                        dtype=tf.float32, name="alphas_conv2")
    shift_parameters_conv2 = tf.convert_to_tensor(values["shift_parameters_conv2"],
                                                  dtype=tf.float32, name="shift_parameters_conv2")
    betas_conv2 = tf.convert_to_tensor(values["betas_conv2"],
                                       dtype=tf.float32, name="betas_conv2")
    # Performing the operations
    binary_filters_conv2 = tf.convert_to_tensor(values["binary_filters_conv2"], dtype=tf.float32,
                                                name="binary_filters_conv2")
    conv2 = ABC(binary_filters_conv2, tf.stop_gradient(alphas_conv2), shift_parameters_conv2,
                betas_conv2, b_conv2, padding="SAME")(h_conv1)
    # Other layers
    pool2 = max_pool_2x2(conv2)
    # batch norm parameters
    mean_conv2, variance_conv2 = bn_mean_variance(pool2)
    bn_gamma_conv2 = tf.convert_to_tensor(values["bn_gamma_conv2"], dtype=tf.float32,
                                          name="bn_gamma_conv2")
    bn_beta_conv2 = tf.convert_to_tensor(values["bn_beta_conv2"], dtype=tf.float32,
                                         name="bn_beta_conv2")
    bn_conv2 = tf.nn.batch_normalization(pool2, mean_conv2, variance_conv2,
                                         bn_beta_conv2, bn_gamma_conv2, 0.001)
    h_conv2 = tf.nn.relu(bn_conv2)

    # Flat the conv2 output
    h_conv2_flat = tf.reshape(h_conv2, shape=(-1, 7*7*64))

    # Dense layer1
    W_fc1 = tf.convert_to_tensor(values["W_fc1"], dtype=tf.float32)
    b_fc1 = tf.convert_to_tensor(values["b_fc1"], dtype=tf.float32)
    h_fc1 = tf.nn.relu(tf.matmul(h_conv2_flat, W_fc1) + b_fc1)

    # Dropout
    keep_prob = tf.placeholder(tf.float32)
    h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

    # Output layer
    W_fc2 = tf.convert_to_tensor(values["W_fc2"], dtype=tf.float32)
    b_fc2 = tf.convert_to_tensor(values["b_fc2"], dtype=tf.float32)
    y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
    
    # Labels
    y = tf.placeholder(tf.int32, [None])
    y_ = tf.one_hot(y, 10)
    
    # Defining optimizer and loss
    cross_entropy = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv))
    correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

Let's test the inference model

In [None]:
%%time
with tf.Session(graph=inference_graph) as sess:
    loss_vals = []
    acc_vals = []
    for iteration in range(1, 500 + 1):            
        X_batch, y_batch = mnist.validation.next_batch(batch_size)
        acc_val, loss_val = sess.run([accuracy, cross_entropy],
                                 feed_dict={x: batch[0], y: batch[1], keep_prob: 1.0})
        loss_vals.append(loss_val)
        acc_vals.append(acc_val)
        print("\rEvaluating the model: {}/{} ({:.1f}%)".format(iteration, 500,
            iteration * 100 / 500),
              end=" " * 10)
    loss_val = np.mean(loss_vals)
    acc_val = np.mean(acc_vals)
    print("\rEpoch: {}  Val accuracy: {:.4f}%  Loss: {:.6f}".format(
        epoch + 1, acc_val * 100, loss_val))