## Multilayers using plain Tensorflow

In [1]:
import logging
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
logging.debug("test")

DEBUG:root:test


In [2]:
import sys
mlpdir = '/home/student/Dropbox/msc_Artificial_Intelligence/mlp_Machine_Learning_Practical/mlpractical'
sys.path.append(mlpdir)

In [3]:
import tensorflow as tf

In [4]:
from mlp.data_providers import MNISTDataProvider
from mylibs.jupyter_notebook_helper import show_graph
import tensorflow as tf
import numpy as np
import os
import datetime
import math

In [5]:
# Seed a random number generator
seed = 16011984
rng = np.random.RandomState(seed)

In [6]:
totalTrain = 50000
totalValid = 10000

In [7]:
factor = 1000
batchTrainSize = totalTrain/factor
batchValidSize = totalValid/factor

In [8]:
train_data = MNISTDataProvider('train', batch_size=batchTrainSize, rng=rng, shuffle_order=False) #all 50000
valid_data = MNISTDataProvider('valid', batch_size=batchValidSize, rng=rng, shuffle_order=False) #all 10000

In [9]:
bitArchitecture = tf.float64

In [10]:
#Remember to avoid ReLU functions to become "dead" neurons we should set the bias approximately higher than zero
def bias_variable(shape):
    return tf.Variable(tf.constant(0.1, shape=shape, dtype=bitArchitecture), dtype=bitArchitecture)

In [11]:
def weight_variable(shape):
    outDim = shape[-1]
    #https://www.tensorflow.org/how_tos/variables/
    #http://www.inf.ed.ac.uk/teaching/courses/mlpr/2016/notes/w4b_neural_net_intro.html
    
    #tf.truncated_normal: the tails are not trusted, if you have something above or below two standard deviations, then
    #this sample is not used at all, we just try and pick again from the distribution.
    
    return tf.Variable(tf.truncated_normal(shape = shape, mean=0, stddev=0.1/math.sqrt(outDim),
                                          dtype=bitArchitecture), dtype=bitArchitecture)

In [12]:
def conv2d(x, W):
    #input: [batch, in_height, in_width, in_channels]
    #filter: [filter_height, filter_width, in_channels, out_channels]
    return tf.nn.conv2d(input=x, filter=W, strides=[1,1,1,1], padding='SAME') # zero padding

In [13]:
def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')

In [14]:
def convert1DdigitTo2DforConv(x):
    #batchTrainSize
    return tf.reshape(x, shape=(-1, 28, 28, 1)) #-1 means calculate it by yourself

In [15]:
graph = tf.Graph() #create new graph

#with graph.as_default(), tf.device('/gpu:0'): 
#with graph.as_default(), tf.device('/cpu:0'):
with graph.as_default(): 
    with tf.name_scope('data'):
        #The shape argument to placeholder is optional, but it allows TensorFlow to automatically catch
        #bugs stemming from inconsistent tensor shapes.
        x = tf.placeholder(bitArchitecture, [None, 784], 'inputs')
        y_ = tf.placeholder(bitArchitecture, [None, 10], 'targets')

    with tf.name_scope('inputs_2D'):
        x_image = convert1DdigitTo2DforConv(x)

    with tf.name_scope('convolution1_vars'):
        W_conv1 = weight_variable((5, 5, 1, 32))
        b_conv1 = bias_variable([32])

    with tf.name_scope('convolution1_and_pooling'):
        h_conv1 = tf.nn.relu( conv2d(x_image, W_conv1) + b_conv1 )
        h_pool1 = max_pool_2x2(h_conv1)

    #So now we have 32 features each with 14 pixels
    #(I thought we did zero padding but instead of 32/2 = 16 we only have 14 pixels each)
    print h_pool1.get_shape()

    #64 features each with 5x5 patch, so now we have 32 channels (it's the depth)
    #it's like we are extracting two features per feature

    with tf.name_scope('convolution2_vars'):
        #filter: [filter_height, filter_width, in_channels, out_channels]
        W_conv2 = weight_variable((5, 5, 32, 64))
        b_conv2 = bias_variable([64])

    with tf.name_scope('convolution2_and_pooling'):
        h_conv2 = tf.nn.relu( conv2d(h_pool1, W_conv2) + b_conv2 )
        h_pool2 = max_pool_2x2(h_conv2)

    print h_pool2.get_shape()

    flattenedShape = 7*7*64

    #-1 means figure it out yourself computer :P
    h_pool2_flat = tf.reshape(h_pool2, shape=(-1, flattenedShape), name='flattening_convolutions')

    with tf.name_scope('affine'):
        W_fc1 = weight_variable((flattenedShape, 1024))
        b_fc1 = bias_variable([1024])
        h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

    with tf.name_scope('regularization'):
        keep_prob = tf.placeholder(bitArchitecture)
        h_fc1_prob = tf.nn.dropout(h_fc1, keep_prob)

    with tf.name_scope('readout_layer'):
        W_fc2 = weight_variable([1024, 10])
        b_fc2 = bias_variable([10])
        y_conv = tf.matmul(h_fc1_prob, W_fc2) + b_fc2

    with tf.name_scope('error'):
        per_datapoint_errors = tf.nn.softmax_cross_entropy_with_logits(y_conv, y_)
        error = tf.reduce_mean(per_datapoint_errors)

    with tf.name_scope('accuracy'):
        per_datapoint_pred_is_correct = tf.equal(tf.argmax(y_conv, axis=1), tf.argmax(y_, axis=1))
        accuracy = tf.reduce_mean(tf.cast(per_datapoint_pred_is_correct, bitArchitecture))

    with tf.name_scope('training'):
        train_step = tf.train.GradientDescentOptimizer(learning_rate=0.3).minimize(error)

#     tf.summary.scalar('error', error)
#     tf.summary.scalar('accuracy', accuracy)
#     summary_op = tf.summary.merge_all()
    
    init = tf.global_variables_initializer()

TypeError: DataType float64 for attr 'T' not in list of allowed values: float32, float16

In [12]:
#show_graph(graph_def=graph)

In [16]:
# timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
# train_writer = tf.summary.FileWriter(
#     logdir=os.path.join('conv_log', timestamp, 'train'),
#     graph=graph
# )
# valid_writer = tf.summary.FileWriter(
#     os.path.join('conv_log', timestamp, 'valid'),
#     graph=graph
# )

In [17]:
config = tf.ConfigProto(log_device_placement=True, allow_soft_placement=True)
config.gpu_options.allow_growth = True

In [18]:
from time import time

In [None]:
# sess = tf.Session(graph=graph, config=config)
# sess.run(init)

In [19]:
#%%time
start = time()

#sess = tf.InteractiveSession(graph=graph, config=config)
with tf.Session(graph=graph, config=config) as sess:
    sess.run(init)

    num_epoch = 10

    for e in range(num_epoch):
        train_error = 0.
        train_acc = 0.

        valid_error = 0.
        valid_acc = 0.

        step = 0
        for input_batch, target_batch in train_data:
            #_, batch_error, batch_acc, train_summary = sess.run(
                #[train_step, error, accuracy, summary_op],
            _, batch_error, batch_acc = sess.run(
                [train_step, error, accuracy],
                feed_dict={x: input_batch, y_: target_batch, keep_prob: 0.5})
            train_error += batch_error
            train_acc += batch_acc

            #summary
            #train_summary = sess.run(summary_op, feed_dict={inputs: input_batch, targets: target_batch})
            #train_writer.add_summary(train_summary, global_step= e * train_data.num_batches + step)
            step += 1

        if (e+1)%5  == 0:
            step = 0
            for input_batch, target_batch in valid_data:
                #batch_error, batch_acc, valid_summary = sess.run(
                    #[error, accuracy, summary_op],
                batch_error, batch_acc = sess.run(
                    [error, accuracy],
                    feed_dict={x: input_batch, y_: target_batch, keep_prob: 1.}
                )
                valid_error += batch_error
                valid_acc += batch_acc

                #summary
                #valid_summary = sess.run(summary_op, feed_dict={inputs: input_batch, targets: target_batch})
                #print valid_summary
                #valid_writer.add_summary(valid_summary, global_step = e*valid_data.num_batches + step)
                step += 1

        train_error /= train_data.num_batches
        train_acc /= train_data.num_batches

        valid_error /= valid_data.num_batches
        valid_acc /= valid_data.num_batches

        print 'End of epoch %d: train error = %.2f, train accuracy = %.2f, valid error = %.2f, valid accuracy = %.2f'\
            % (e + 1, train_error, train_acc, valid_error, valid_acc)
            
print "total time in minutes: %.3f" % ((time() - start)/60)

End of epoch 1: train error = 1.48, train accuracy = 0.51, valid error = 0.00, valid accuracy = 0.00
End of epoch 2: train error = 0.20, train accuracy = 0.94, valid error = 0.00, valid accuracy = 0.00


KeyboardInterrupt: 