## Example two-layer classifier models

Below example code is given for creating instances of the CIFAR-10 and CIFAR-100 data provider objects and using them to train simple two-layer feedforward network models with rectified linear activations in TensorFlow. You may wish to use this code as a starting point for your own experiments.

In [1]:
train_batch_size = 50
valid_batch_size = 50
res_blocks_num = 2
weight_decay = 0.0002

In [2]:
import os
import tensorflow as tf
import numpy as np
from mlp.data_providers import CIFAR10DataProvider
import matplotlib.pyplot as plt
%matplotlib inline
import time
import datetime

In [3]:
def open_file_to_write(name):
    global train_writer
    global valid_writer
    timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    train_writer = tf.summary.FileWriter(os.path.join('log-bin-resnet/',name, timestamp, 'train'))
    valid_writer = tf.summary.FileWriter(os.path.join('log-bin-resnet/', name, timestamp, 'valid'))

In [14]:
def conv_bin(x, filter, strides, padding):
    G = tf.get_default_graph()
    with G.gradient_override_map({"Sign": "Identity"}):
                w_shape = tf.shape(filter)
                n = tf.cast(tf.reduce_prod(w_shape[0:-1]),tf.float32) 
                abs = tf.abs(filter)
                
                a = tf.stop_gradient(tf.reduce_sum(abs, [0,1,2])/n) #wektor
                # Przy tej implementacji siec sie uczy.
                #a = tf.stop_gradient(tf.reduce_sum(abs, [0,1,2,3])/n) taka sama alfa dla wszystkich, skalar
                # 
                
                return tf.nn.conv2d(x, tf.sign(filter/a), strides, padding)*a

### CIFAR-10

In [5]:
train_data = CIFAR10DataProvider('train', batch_size=train_batch_size)
re = train_data.inputs.reshape((40000, -1, 3), order='F')
train_data.inputs = re.reshape((40000, 32, 32, 3))



valid_data = CIFAR10DataProvider('valid', batch_size=valid_batch_size)

# RESNET FUNCTIONS

In [18]:
def fully_connected_layer(inputs, input_dim, output_dim, nonlinearity=tf.nn.relu):
    weights = tf.Variable(
        tf.truncated_normal(
            [input_dim, output_dim], stddev=2. / (input_dim + output_dim)**0.5), 
        'weights')
    biases = tf.Variable(tf.zeros([output_dim]), 'biases')
    outputs = nonlinearity(tf.matmul(inputs, weights) + biases)
    return outputs

def create_variables(name, shape, initializer=tf.contrib.layers.xavier_initializer(), is_fc_layer=False):
    '''
    :param name: A string. The name of the new variable
    :param shape: A list of dimensions
    :param initializer: User Xavier as default.
    :param is_fc_layer: Want to create fc layer variable? May use different weight_decay for fc
    layers.
    :return: The created variable
    '''
    
    if is_fc_layer is True:
        regularizer = tf.contrib.layers.l2_regularizer(scale=weight_decay)
    else:
        regularizer = tf.contrib.layers.l2_regularizer(scale=weight_decay)

    new_variables = tf.get_variable(name, shape=shape, initializer=initializer,
                                    regularizer=regularizer)
    return new_variables


def batch_normalization_layer(input_layer, dimension):
    '''
    Helper function to do batch normalziation
    :param input_layer: 4D tensor
    :param dimension: input_layer.get_shape().as_list()[-1]. The depth of the 4D tensor
    :return: the 4D tensor after being normalized
    '''
    mean, variance = tf.nn.moments(input_layer, axes=[0, 1, 2])
    beta = tf.get_variable('beta', dimension, tf.float32,
                               initializer=tf.constant_initializer(0.0, tf.float32))
    gamma = tf.get_variable('gamma', dimension, tf.float32,
                                initializer=tf.constant_initializer(1.0, tf.float32))
    bn_layer = tf.nn.batch_normalization(input_layer, mean, variance, beta, gamma, BN_EPSILON)

    return bn_layer


def conv_bn_relu_layer(input_layer, filter_shape, stride):
    '''
    A helper function to conv, batch normalize and relu the input tensor sequentially
    :param input_layer: 4D tensor
    :param filter_shape: list. [filter_height, filter_width, filter_depth, filter_number]
    :param stride: stride size for conv
    :return: 4D tensor. Y = Relu(batch_normalize(conv(X)))
    '''

    out_channel = filter_shape[-1]
    filter = create_variables(name='conv', shape=filter_shape)

    conv_layer = conv_bin(input_layer, filter, strides=[1, stride, stride, 1], padding='SAME')
    bn_layer = batch_normalization_layer(conv_layer, out_channel)

    output = tf.nn.relu(bn_layer)
    return output


def bn_relu_conv_layer(input_layer, filter_shape, stride):
    '''
    A helper function to batch normalize, relu and conv the input layer sequentially
    :param input_layer: 4D tensor
    :param filter_shape: list. [filter_height, filter_width, filter_depth, filter_number]
    :param stride: stride size for conv
    :return: 4D tensor. Y = conv(Relu(batch_normalize(X)))
    '''

    in_channel = input_layer.get_shape().as_list()[-1]

    bn_layer = batch_normalization_layer(input_layer, in_channel)
    relu_layer = tf.nn.relu(bn_layer)

    filter = create_variables(name='conv', shape=filter_shape)
    conv_layer = conv_bin(relu_layer, filter, strides=[1, stride, stride, 1], padding='SAME')
    return conv_layer



def residual_block(input_layer, output_channel, first_block=False):
    '''
    Defines a residual block in ResNet
    :param input_layer: 4D tensor
    :param output_channel: int. return_tensor.get_shape().as_list()[-1] = output_channel
    :param first_block: if this is the first residual block of the whole network
    :return: 4D tensor.
    '''
    input_channel = input_layer.get_shape().as_list()[-1]

    # When it's time to "shrink" the image size, we use stride = 2
    if input_channel * 2 == output_channel:
        increase_dim = True
        stride = 2
    elif input_channel == output_channel:
        increase_dim = False
        stride = 1
    else:
        raise ValueError('Output and input channel does not match in residual blocks!!!')

    # The first conv layer of the first residual block does not need to be normalized and relu-ed.
    with tf.variable_scope('conv1_in_block'):
        if first_block:
            filter = create_variables(name='conv', shape=[3, 3, input_channel, output_channel])
            conv1 = conv_bin(input_layer, filter=filter, strides=[1, 1, 1, 1], padding='SAME')
        else:
            conv1 = bn_relu_conv_layer(input_layer, [3, 3, input_channel, output_channel], stride)

    with tf.variable_scope('conv2_in_block'):
        conv2 = bn_relu_conv_layer(conv1, [3, 3, output_channel, output_channel], 1)

    # When the channels of input layer and conv2 does not match, we add zero pads to increase the
    #  depth of input layers
    if increase_dim is True:
        pooled_input = tf.nn.avg_pool(input_layer, ksize=[1, 2, 2, 1],
                                      strides=[1, 2, 2, 1], padding='VALID')
        padded_input = tf.pad(pooled_input, [[0, 0], [0, 0], [0, 0], [input_channel // 2,
                                                                     input_channel // 2]])
    else:
        padded_input = input_layer

    output = conv2 + padded_input
    return output



In [20]:
IMG_WIDTH = 32
IMG_HEIGHT = 32
IMG_DEPTH = 3
NUM_CLASS = 10

BN_EPSILON=0.005

reuse = False

In [8]:
tf.reset_default_graph()

In [9]:
def create_model(n):
    #num layers = 6n + 2
    global init
    global summary_op
    global error
    global accuracy
    global outputs
    global train_step
    global inputs
    global targets
    global outputs
    
    inputs = tf.placeholder(tf.float32,[train_batch_size, IMG_HEIGHT,
                                                        IMG_WIDTH, IMG_DEPTH], 'inputs')
    targets = tf.placeholder(tf.float32, [None, train_data.num_classes], 'targets')
    #def inference(input_tensor_batch, n, reuse):

    layers = []
    with tf.variable_scope('conv0', reuse=reuse):
        conv0 = conv_bn_relu_layer(inputs, [3, 3, 3, 16], 1)
        layers.append(conv0)

    for i in range(n):
        with tf.variable_scope('conv1_%d' %i, reuse=reuse):
            if i == 0:
                conv11 = residual_block(layers[-1], 16, first_block=True)
            else:
                conv11 = residual_block(layers[-1], 16)
            layers.append(conv11)

    for i in range(n):
        with tf.variable_scope('conv2_%d' %i, reuse=reuse):
            conv2 = residual_block(layers[-1], 32)
            layers.append(conv2)

    for i in range(n):
        with tf.variable_scope('conv3_%d' %i, reuse=reuse):
            conv3 = residual_block(layers[-1], 64)
            layers.append(conv3)
        assert conv3.get_shape().as_list()[1:] == [8, 8, 64]

    with tf.variable_scope('fc', reuse=reuse):
        in_channel = layers[-1].get_shape().as_list()[-1]
        bn_layer = batch_normalization_layer(layers[-1], in_channel)
        relu_layer = tf.nn.relu(bn_layer)
        global_pool = tf.reduce_mean(relu_layer, [1, 2])

        assert global_pool.get_shape().as_list()[-1:] == [64]
        outputs = fully_connected_layer(global_pool, 64, 10, tf.identity)
        
    with tf.name_scope('error'):
        error = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(outputs, targets))
    with tf.name_scope('accuracy'):
        accuracy = tf.reduce_mean(tf.cast(
                tf.equal(tf.argmax(outputs, 1), tf.argmax(targets, 1)), 
                tf.float32))

    with tf.name_scope('train'):
        opt = tf.train.MomentumOptimizer(learning_rate=0.01, momentum=0.9)
        train_step = opt.minimize(error)

    init = tf.global_variables_initializer()

    tf.summary.scalar('error', error)
    tf.summary.scalar('accuracy', accuracy)
    summary_op = tf.summary.merge_all()


In [10]:
def run_session(name, num_epoch):
    open_file_to_write(name)
    
    with tf.Session() as sess:
        valid_inputs = valid_data.inputs
        valid_targets = valid_data.to_one_of_k(valid_data.targets) 
        sess.run(init)
        for e in range(num_epoch):
            running_error = 0.
            running_accuracy = 0.
            start = time.time()
            for b, (input_batch, target_batch) in enumerate(train_data):
                _, batch_error, batch_acc, summary = sess.run(
                    [train_step, error, accuracy, summary_op], 
                    feed_dict={inputs: input_batch, targets: target_batch})
                
                running_error += batch_error
                running_accuracy += batch_acc

                train_writer.add_summary(summary, e * train_data.num_batches + b)
                
            running_error /= train_data.num_batches
            running_accuracy /= train_data.num_batches
            print('End of epoch {0:02d}: err(train)={1:.2f} acc(train)={2:.2f}, time(train)={3:.2f}'
                  .format(e + 1, running_error, running_accuracy, time.time() - start))
            if (e + 1) % 5 == 0:
                valid_error = 0.
                valid_accuracy = 0.
                for b, (input_batch, target_batch) in enumerate(valid_data):
                    input_batch = input_batch.reshape((50, IMG_HEIGHT * IMG_WIDTH, IMG_DEPTH), order='F')
                    input_batch = input_batch.reshape((50, IMG_HEIGHT, IMG_WIDTH, IMG_DEPTH))
                    
                    batch_error, batch_acc, summary = sess.run(
                        [error, accuracy, summary_op], 
                        feed_dict={inputs: input_batch, targets: target_batch})
                    valid_error += batch_error
                    valid_accuracy += batch_acc
                    valid_writer.add_summary(summary, e * train_data.num_batches + b)
                valid_error /= valid_data.num_batches
                valid_accuracy /= valid_data.num_batches
                print('                 err(valid)={0:.2f} acc(valid)={1:.2f}'
                       .format(valid_error, valid_accuracy))

In [15]:
tf.reset_default_graph()
create_model(2)
run_session('2-layers', 15)

End of epoch 01: err(train)=1.57 acc(train)=0.42, time(train)=1050.05
End of epoch 02: err(train)=1.20 acc(train)=0.57, time(train)=925.04
End of epoch 03: err(train)=1.03 acc(train)=0.63, time(train)=971.18
End of epoch 04: err(train)=0.92 acc(train)=0.68, time(train)=944.71
End of epoch 05: err(train)=0.84 acc(train)=0.70, time(train)=874.41
                 err(valid)=0.85 acc(valid)=0.70
End of epoch 06: err(train)=0.78 acc(train)=0.72, time(train)=865.67
End of epoch 07: err(train)=0.74 acc(train)=0.74, time(train)=958.35
End of epoch 08: err(train)=0.70 acc(train)=0.75, time(train)=952.35
End of epoch 09: err(train)=0.67 acc(train)=0.76, time(train)=938.14
End of epoch 10: err(train)=0.65 acc(train)=0.77, time(train)=827.97
                 err(valid)=0.74 acc(valid)=0.74
End of epoch 11: err(train)=0.62 acc(train)=0.78, time(train)=822.01
End of epoch 12: err(train)=0.60 acc(train)=0.79, time(train)=792.40
End of epoch 13: err(train)=0.59 acc(train)=0.79, time(train)=784.47
End 

In [19]:
tf.reset_default_graph()
create_model(2)
run_session('2-layers-0.01', 15)

End of epoch 01: err(train)=1.57 acc(train)=0.42, time(train)=1412.20
End of epoch 02: err(train)=1.22 acc(train)=0.56, time(train)=1354.57
End of epoch 03: err(train)=1.05 acc(train)=0.62, time(train)=1372.44
End of epoch 04: err(train)=0.95 acc(train)=0.66, time(train)=1346.49
End of epoch 05: err(train)=0.87 acc(train)=0.69, time(train)=1562.73
                 err(valid)=0.90 acc(valid)=0.68
End of epoch 06: err(train)=0.81 acc(train)=0.71, time(train)=1484.73
End of epoch 07: err(train)=0.76 acc(train)=0.73, time(train)=1355.07
End of epoch 08: err(train)=0.71 acc(train)=0.75, time(train)=1357.23
End of epoch 09: err(train)=0.69 acc(train)=0.76, time(train)=1356.40
End of epoch 10: err(train)=0.66 acc(train)=0.77, time(train)=1357.75
                 err(valid)=0.74 acc(valid)=0.75
End of epoch 11: err(train)=0.63 acc(train)=0.78, time(train)=1357.50
End of epoch 12: err(train)=0.61 acc(train)=0.78, time(train)=1356.13
End of epoch 13: err(train)=0.59 acc(train)=0.79, time(train)=

In [21]:
tf.reset_default_graph()
create_model(2)
run_session('2-layers-0.005', 15)

End of epoch 01: err(train)=1.59 acc(train)=0.41, time(train)=1530.84
End of epoch 02: err(train)=1.20 acc(train)=0.57, time(train)=3636.97
End of epoch 03: err(train)=1.02 acc(train)=0.63, time(train)=1823.44
End of epoch 04: err(train)=0.92 acc(train)=0.67, time(train)=1566.55
End of epoch 05: err(train)=0.85 acc(train)=0.70, time(train)=1633.16
                 err(valid)=0.86 acc(valid)=0.70
End of epoch 06: err(train)=0.78 acc(train)=0.72, time(train)=1554.59
End of epoch 07: err(train)=0.74 acc(train)=0.74, time(train)=1549.78
End of epoch 08: err(train)=0.70 acc(train)=0.75, time(train)=1582.75
End of epoch 09: err(train)=0.66 acc(train)=0.77, time(train)=1630.11
End of epoch 10: err(train)=0.64 acc(train)=0.78, time(train)=1653.05
                 err(valid)=0.71 acc(valid)=0.75
End of epoch 11: err(train)=0.61 acc(train)=0.79, time(train)=1538.21
End of epoch 12: err(train)=0.60 acc(train)=0.79, time(train)=1595.97
End of epoch 13: err(train)=0.57 acc(train)=0.80, time(train)=

In [16]:
tf.reset_default_graph()
create_model(5)
run_session('5-layers-bin', 15)

End of epoch 01: err(train)=1.54 acc(train)=0.43, time(train)=3271.71
End of epoch 02: err(train)=1.12 acc(train)=0.60, time(train)=3213.43
End of epoch 03: err(train)=0.95 acc(train)=0.66, time(train)=3200.79
End of epoch 04: err(train)=0.84 acc(train)=0.70, time(train)=3205.30
End of epoch 05: err(train)=0.75 acc(train)=0.74, time(train)=3203.43
                 err(valid)=0.76 acc(valid)=0.73
End of epoch 06: err(train)=0.69 acc(train)=0.76, time(train)=3213.07
End of epoch 07: err(train)=0.64 acc(train)=0.78, time(train)=3207.26
End of epoch 08: err(train)=0.60 acc(train)=0.79, time(train)=3196.89
End of epoch 09: err(train)=0.56 acc(train)=0.80, time(train)=3219.21
End of epoch 10: err(train)=0.54 acc(train)=0.81, time(train)=3457.45
                 err(valid)=0.64 acc(valid)=0.78
End of epoch 11: err(train)=0.51 acc(train)=0.82, time(train)=3254.48
End of epoch 12: err(train)=0.48 acc(train)=0.83, time(train)=3209.75
End of epoch 13: err(train)=0.46 acc(train)=0.84, time(train)=

In [22]:
tf.reset_default_graph()
create_model(5)
run_session('5-layers-0.05-bin', 15)

#learning rate 0.005

End of epoch 01: err(train)=1.57 acc(train)=0.42, time(train)=4768.38
End of epoch 02: err(train)=1.15 acc(train)=0.58, time(train)=3178.44
End of epoch 03: err(train)=0.94 acc(train)=0.67, time(train)=3188.64
End of epoch 04: err(train)=0.83 acc(train)=0.70, time(train)=3180.61
End of epoch 05: err(train)=0.75 acc(train)=0.74, time(train)=3183.27
                 err(valid)=0.77 acc(valid)=0.73
End of epoch 06: err(train)=0.69 acc(train)=0.76, time(train)=3172.53
End of epoch 07: err(train)=0.64 acc(train)=0.78, time(train)=3180.75
End of epoch 08: err(train)=0.60 acc(train)=0.79, time(train)=3179.37
End of epoch 09: err(train)=0.56 acc(train)=0.80, time(train)=3198.07
End of epoch 10: err(train)=0.53 acc(train)=0.82, time(train)=3179.45
                 err(valid)=0.66 acc(valid)=0.77
End of epoch 11: err(train)=0.51 acc(train)=0.82, time(train)=3179.68
End of epoch 12: err(train)=0.48 acc(train)=0.84, time(train)=3177.59
End of epoch 13: err(train)=0.47 acc(train)=0.84, time(train)=