In [0]:
from numpy.random import seed
seed(42)
from tensorflow import set_random_seed
set_random_seed(42)

In [0]:
import os
import numpy as np
import tensorflow as tf

from time import strftime
from PIL import Image

In [0]:
NR_CLASSES = 10
VALIDATION_SIZE = 10000
IMAGE_WIDTH = 28
IMAGE_HEIGHT = 28
CHANNELS = 1
TOTAL_INPUTS = IMAGE_WIDTH*IMAGE_HEIGHT*CHANNELS

### 1) Download mnist dataset and create train and test sets.

In [0]:
from keras.datasets import mnist

(X_train_all, y_train_all), (X_test, y_test) = mnist.load_data()

num_classes = np.unique(y_train_all).shape[0]
print("Shape of training dataset:", X_train_all.shape)
print("Number of training examples:", X_train_all.shape[0])
print("Number of testing examples:", X_test.shape[0])
print("Number of classes:", num_classes)
print("Image shape:", X_train_all[0].shape)
print("Image data type:", X_train_all.dtype)

Using TensorFlow backend.


Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz
Shape of training dataset: (60000, 28, 28)
Number of training examples: 60000
Number of testing examples: 10000
Number of classes: 10
Image shape: (28, 28)
Image data type: uint8


### 2) Reshape and rescale data: make it between 0 and 1 - neural networs usually work better with this type of data

In [0]:
from sklearn.preprocessing import normalize


# Reshape the data from 3 dimensions to 2
X_train_all = X_train_all.reshape((X_train_all.shape[0],TOTAL_INPUTS))
X_test = X_test.reshape((X_test.shape[0],TOTAL_INPUTS))
# Re-scale the train and test set
X_train_all = X_train_all/255.0
X_test = X_test/ 255.0

y_train_all = np.eye(NR_CLASSES)[y_train_all]
y_test = np.eye(NR_CLASSES)[y_test]

print(y_train_all.shape)
print(y_test.shape)

(60000, 10)
(10000, 10)


### 3) Split the training dataset into a smaller training dataset and a validation dataset for the features and the labels. Create four arrays: x_val, y_val, x_train, and y_train from x_train_all and y_train_all. Use the validation size of 10,000.

In [0]:
X_val = X_train_all[:VALIDATION_SIZE]
y_val = y_train_all[:VALIDATION_SIZE]

X_train = X_train_all[VALIDATION_SIZE:]
y_train = y_train_all[VALIDATION_SIZE:]

print("Training set shape:", X_train.shape)
print("Validation set shape:", X_val.shape)
print("Testing set shape:", X_test.shape)

Training set shape: (50000, 784)
Validation set shape: (10000, 784)
Testing set shape: (10000, 784)


In [0]:
print("Training set shape:", y_train.shape)
print("Validation set shape:", y_val.shape)
print("Testing set shape:", y_test.shape)

Training set shape: (50000, 10)
Validation set shape: (10000, 10)
Testing set shape: (10000, 10)


### 4) Setup Tensorflow Graph

In [0]:
#tf.reset_default_graph()

In [0]:
X = tf.placeholder(tf.float32, shape=[None, TOTAL_INPUTS], name='X')
Y = tf.placeholder(tf.float32, shape=[None, NR_CLASSES], name='labels')

print(X)
print(Y)

Tensor("X:0", shape=(?, 784), dtype=float32)
Tensor("labels:0", shape=(?, 10), dtype=float32)


### 5) Create variables for number of epochs, learning rate and two hidden layers: 512 and 64 neurons

In [0]:
nr_epochs = 16
learning_rate = 0.01

n_hidden1 = 512
n_hidden2 = 64

# Setup of tensorboard on google colab
Tensorboard is the very good way to visualise your data. 

In [0]:
!wget https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
!unzip ngrok-stable-linux-amd64.zip

LOG_DIR = './log'
get_ipython().system_raw(
    'tensorboard --logdir {} --host 0.0.0.0 --port 6006 &'
    .format(LOG_DIR)
)

get_ipython().system_raw('./ngrok http 6006 &')

! curl -s http://localhost:4040/api/tunnels | python3 -c \
    "import sys, json; print(json.load(sys.stdin)['tunnels'][0]['public_url'])"

--2019-11-04 17:19:47--  https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
Resolving bin.equinox.io (bin.equinox.io)... 3.214.169.236, 35.170.171.200, 52.7.202.148, ...
Connecting to bin.equinox.io (bin.equinox.io)|3.214.169.236|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 13773305 (13M) [application/octet-stream]
Saving to: ‘ngrok-stable-linux-amd64.zip’


2019-11-04 17:19:48 (17.7 MB/s) - ‘ngrok-stable-linux-amd64.zip’ saved [13773305/13773305]

Archive:  ngrok-stable-linux-amd64.zip
  inflating: ngrok                   
http://9022b868.ngrok.io


### 6) Create function to proceed one layer in neural network:
You have input, dimension of weight: weight_dim, dimension of bias: bias_dim and name of your layer, return the output layer.
Use truncated normal distribution to generate initial weights and zero-constants for biases. If the name = "out", use softmax, in other cas use relu

In [0]:
def setup_layer(input, weight_dim, bias_dim, name):
    
    with tf.name_scope(name):
        initial_w = tf.truncated_normal(weight_dim, stddev=0.1, seed=42)  
        w = tf.Variable(initial_value=initial_w)  
        
        initial_b = tf.constant(value=0.0, shape=bias_dim)
        b = tf.Variable(initial_value=initial_b) 
        
        layer_in = tf.matmul(input, w) + b
        if name =='out':
          layer_out = tf.nn.softmax(layer_in)
        else :
          layer_out = tf.nn.relu(layer_in)

        tf.summary.histogram('weights', w)
        tf.summary.histogram('biases', b)
        
        return layer_out

### 7) Create neural network with 2 hidden layers, using this function from previous item. Add also one dropout layer to avoid overfitting

In [0]:
layer_1 = setup_layer(X, [TOTAL_INPUTS, n_hidden1], [n_hidden1], 'layer1')
layer_drop = tf.nn.dropout(layer_1, rate=0.2)
layer_2 = setup_layer(layer_drop, [n_hidden1, n_hidden2], [n_hidden2], 'layer2')
output = setup_layer(layer_2, [n_hidden2, NR_CLASSES], [NR_CLASSES], 'out')

model_name = f'{n_hidden1}-DO-{n_hidden2} LR{learning_rate} E{nr_epochs}'

model_name

'512-DO-64 LR0.01 E16'

### 8) For better visualization in TensorBoard we want to use tf.name_scope() to aggregate loss, optimizer, accuracy metrica and performance.

In [0]:
# Defining Loss Function
with tf.name_scope('loss_calc'):
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(Y, output))
# Defining Optimizer
with tf.name_scope('optimizer'):
    optimizer = tf.train.AdamOptimizer(learning_rate)
    train_step = optimizer.minimize(loss)
# Accuracy Metric
with tf.name_scope('accuracy_calc'):
    correct_pred = tf.cast(tf.equal(tf.argmax(output, 1), tf.argmax(Y, 1)), tf.float32)
    accuracy = tf.reduce_mean(correct_pred)

# Add summaries for tensorboard
with tf.name_scope('performance'):
    tf.summary.scalar('accuracy', accuracy)
    tf.summary.scalar('cost', loss)

#Add more summaries for tensorboard: check Input Images in Tensorboard
with tf.name_scope('show_image'):
    x_image = tf.reshape(X, [-1, 28, 28, 1])
    tf.summary.image('image_input', x_image, max_outputs=4)

### 9) Create session using tf.Session(), merge summaries using tf.summary.merge_all(). Use tf.summary.FileWriter() to write you summaries.

In [0]:
#Run Session

sess = tf.Session()

merged_summary = tf.summary.merge_all()

train_writer = tf.summary.FileWriter(LOG_DIR+'/train')
train_writer.add_graph(sess.graph)

validation_writer = tf.summary.FileWriter(LOG_DIR+'/val')

### 10) Initialise all the variables, and run the session, look at the TensorBoard

In [0]:
init = tf.global_variables_initializer()
sess.run(init)

### 11) If you data is quite big, it is usefull to have so-called batches, smaller pieces of data. We have 50000 data points, we want to have batches with 1000 points. Create next_batch function, which gives you the next part of the data

In [0]:
size_of_batch = 1000
num_examples = X_train.shape[0]
nr_iterations = int(num_examples/size_of_batch)

index_in_epoch = 0

In [0]:
def next_batch(batch_size, data, labels):
    
    global num_examples
    global index_in_epoch

    start = index_in_epoch
    index_in_epoch += batch_size
    if index_in_epoch > num_examples:
        start = 0
        index_in_epoch = batch_size
    end = index_in_epoch
    return data[start:end], labels[start:end]

### 12) Run the algorighm: do several so-called epochs - the runs through all the data. In each epoch use 50 batches with 1000 data points. Write information to TensorBoard to investigate later.

In [0]:
nr_epochs = 16

for epoch in range(nr_epochs):
    
    # ============= Training Dataset ============
    for i in range(nr_iterations):
        batch_x, batch_y = next_batch(size_of_batch, X_train, y_train)

        feed_dictionary = {X : batch_x, Y : batch_y}

        sess.run(train_step, feed_dict=feed_dictionary)
    
    train_acc, train_merged_summary = sess.run([accuracy, merged_summary],
                                               feed_dict=feed_dictionary)
    
    # write summary: merged_summary and accuracy to the TensorBoard
    train_writer.add_summary(train_merged_summary, epoch)
    
    print(f'Epoch {epoch} \t| Training Accuracy = {train_acc}')
    
    # ========== Validation Dataset =============
    val_acc, val_merged_summary = sess.run([accuracy, merged_summary],
                                           feed_dict={X: X_val, Y: y_val})
    #add summary for validation data
    validation_writer.add_summary(val_merged_summary, epoch)
    print(f'Epoch {epoch} \t| Validation Accuracy = {val_acc}')

print('Done training!')

Epoch 0 	| Training Accuracy = 0.7839999794960022
Epoch 0 	| Validation Accuracy = 0.7651000022888184
Epoch 1 	| Training Accuracy = 0.9570000171661377
Epoch 1 	| Validation Accuracy = 0.9406999945640564
Epoch 2 	| Training Accuracy = 0.9729999899864197
Epoch 2 	| Validation Accuracy = 0.953499972820282
Epoch 3 	| Training Accuracy = 0.9729999899864197
Epoch 3 	| Validation Accuracy = 0.9538999795913696
Epoch 4 	| Training Accuracy = 0.9710000157356262
Epoch 4 	| Validation Accuracy = 0.9595999717712402
Epoch 5 	| Training Accuracy = 0.9739999771118164
Epoch 5 	| Validation Accuracy = 0.9585999846458435
Epoch 6 	| Training Accuracy = 0.9800000190734863
Epoch 6 	| Validation Accuracy = 0.9598000049591064
Epoch 7 	| Training Accuracy = 0.9750000238418579
Epoch 7 	| Validation Accuracy = 0.9627000093460083
Epoch 8 	| Training Accuracy = 0.9819999933242798
Epoch 8 	| Validation Accuracy = 0.9623000025749207
Epoch 9 	| Training Accuracy = 0.9739999771118164
Epoch 9 	| Validation Accuracy = 

### 13) Calculate the accuracy over the test dataset (x_test and y_test). Use your knowledge of running a session to get the accuracy. Display the accuracy as a percentage rounded to two decimal numbers.

In [0]:
sess.run(train_step, feed_dict={X: X_test, Y: y_test})
test_accuracy = sess.run(accuracy, feed_dict={X: X_test, Y: y_test})

print("Testing Accuracy: {0:.2f}".format(test_accuracy*100))

Testing Accuracy: 96.25


14) IMPORATANT: Reset for the Next Run

In [0]:
# Reset for the Next Run

train_writer.close()
validation_writer.close()
sess.close()
tf.reset_default_graph()

# Keras Implementation

Let's now dive into the implementation of our first neural network.
Our network is a simple neural network, **without convolution operations**.

We make use of the **sequential paradigm** of Tensorflow, made to build models by plugging together building blocks. This interface allows for easier code writing, while Tensorflow also offers alternative ways to write more complex deep learning algorithms through the use of its **define-by-run interface**.

The network's structure is the following :
 - A **flatten** layer, used to vectorize the whole input batch of data
 - A **dense** layer, transforming the 28x28=784 input data to a 512 vector, using a rectified linear unit activation function
 - A **dropout** layer, ensuring the network does not overfit the training data by giving each of its neuron a 20% chance not to be activated at each stage
 - A **dense** layer, outputing a 10 vector using a softmax function

The optimizer we use at first is named **Adam**, because it requires very little parameter tuning.

We use the sparse categorical crossentropy loss function because each sample of our data belongs to exactly one class (i.e. each handwritten digit represents only one specific digit).

We also use the **accuracy** metric, which is basically the percentage of correct predictions our network computes.

We will then train this neural network for **5 epochs** (i.e. on the whole dataset five times), and then test it on the testing set

In [0]:
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.losses import categorical_crossentropy
from tensorflow.keras.optimizers import Adam

In [0]:
# Let's implement the network first
model = tf.keras.models.Sequential([Flatten(input_shape=X_train.shape[1:]),   # Flatten layer
                                    Dense(n_hidden1, activation='relu'),      # Dense layer
                                    Dropout(0.2),                             # Dropout layer
                                    Dense(NR_CLASSES, activation='softmax')   # Dense layer
                                  ])

# Then choose the optimizer, loss function, and metric, as compilation parameters
model.compile(loss=categorical_crossentropy, optimizer=Adam(), metrics=['accuracy'])

Instructions for updating:
If using Keras pass *_constraint arguments to layers.


In [0]:
# Train the model we just built
model.fit(X_train, y_train, validation_data=(X_val, y_val),
          epochs=5, verbose=2)

Train on 50000 samples, validate on 10000 samples
Epoch 1/5
50000/50000 - 11s - loss: 0.2368 - acc: 0.9289 - val_loss: 0.1244 - val_acc: 0.9624
Epoch 2/5
50000/50000 - 11s - loss: 0.1039 - acc: 0.9681 - val_loss: 0.0935 - val_acc: 0.9696
Epoch 3/5
50000/50000 - 11s - loss: 0.0726 - acc: 0.9778 - val_loss: 0.0829 - val_acc: 0.9748
Epoch 4/5
50000/50000 - 9s - loss: 0.0567 - acc: 0.9812 - val_loss: 0.0796 - val_acc: 0.9762
Epoch 5/5
50000/50000 - 9s - loss: 0.0448 - acc: 0.9860 - val_loss: 0.0721 - val_acc: 0.9773


<tensorflow.python.keras.callbacks.History at 0x7fdab52abc50>

In [0]:
# And evaluate its performances on the testing set
pred_loss, pred_acc = model.evaluate(X_test, y_test)

# Now finally print the value of the loss and metric functions specified above
print("The loss on the test set is:", pred_loss)
print("The accuracy on the test set is:", pred_acc)

The loss on the test set is: 0.0744911439513322
The accuracy on the test set is: 0.9789
