In [1]:
#Module we will use down the road
from __future__ import print_function
import numpy as np
import tensorflow as tf
import pickle

In [2]:
#Load the data
pickle_file = 'notMNIST.pickle'

with open(pickle_file, 'rb') as f:
  save = pickle.load(f)
  train_dataset = save['train_dataset']
  train_labels = save['train_labels']
  valid_dataset = save['valid_dataset']
  valid_labels = save['valid_labels']
  test_dataset = save['test_dataset']
  test_labels = save['test_labels']
  del save  # hint to help gc free up memory
  print('Training set', train_dataset.shape, train_labels.shape)
  print('Validation set', valid_dataset.shape, valid_labels.shape)
  print('Test set', test_dataset.shape, test_labels.shape)

Training set (200000, 28, 28) (200000,)
Validation set (10000, 28, 28) (10000,)
Test set (10000, 28, 28) (10000,)


In [3]:
#Reformat the data

image_size = 28
num_labels = 10

def reformat(dataset, labels):
  dataset = dataset.reshape((-1, image_size * image_size)).astype(np.float32)
  # Map 0 to [1.0, 0.0, 0.0 ...], 1 to [0.0, 1.0, 0.0 ...]
  labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
  return dataset, labels

train_dataset, train_labels = reformat(train_dataset, train_labels)
valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)

Training set (200000, 784) (200000, 10)
Validation set (10000, 784) (10000, 10)
Test set (10000, 784) (10000, 10)


In [4]:
def accuracy(predictions, labels):
    return 100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))/len(predictions)

In [5]:
batch_size = 150
Lambda = 1e-5 #5e-5 in  convolution.py
graph = tf.Graph()
with graph.as_default():
    tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_size*image_size))
    tf_train_labels = tf.placeholder(tf.float32, shape = (batch_size, num_labels))
    tf_validation_dataset = tf.constant(valid_dataset)
    tf_test_dataset = tf.constant(test_dataset)
    
    #Variables
    weights = tf.Variable(tf.truncated_normal([image_size*image_size, num_labels]))
    biases = tf.Variable(tf.zeros([num_labels]))
    
    #Training
    logits = tf.matmul(tf_train_dataset, weights) + biases
    loss = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(labels = tf_train_labels, logits= logits)) + \
                                        (Lambda/(2*batch_size))*tf.nn.l2_loss(weights)
    
    #Optimization
    optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
    
    #Predictions
    train_predictions = tf.nn.softmax(logits)
    valid_predictions = tf.nn.softmax(tf.matmul(tf_validation_dataset, weights) + biases)
    test_predictions = tf.nn.softmax(tf.matmul(tf_test_dataset, weights) + biases)

In [6]:
num_steps = 3001
validation_accuracy = []
with tf.Session(graph=graph) as session:
    tf.global_variables_initializer().run()
    print('Initialized')
    for step in np.arange(num_steps):
        offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
        batch_data = train_dataset[offset:(offset + batch_size), :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
        feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
        _, l, predictions = session.run([optimizer, loss, train_predictions], feed_dict = feed_dict)
        if step%500 == 0:
            print('Loss at step {0} : {1}'.format(step, l))
            print('Training accuracy : %.1f'%accuracy(predictions, batch_labels))
            valid_accuracy = accuracy(valid_predictions.eval(), valid_labels)
            validation_accuracy.append(valid_accuracy)
            print('Validation accuracy : %.1f'%valid_accuracy)
    print('Test accuracy : %.1f' % accuracy(test_predictions.eval(), test_labels))

Initialized
Loss at step 0 : 14.6592378616333
Training accuracy : 10.7
Validation accuracy : 13.2
Loss at step 500 : 1.4467601776123047
Training accuracy : 79.3
Validation accuracy : 75.5
Loss at step 1000 : 1.6460164785385132
Training accuracy : 70.0
Validation accuracy : 76.7
Loss at step 1500 : 1.3156633377075195
Training accuracy : 70.0
Validation accuracy : 76.9
Loss at step 2000 : 0.9650393724441528
Training accuracy : 80.0
Validation accuracy : 77.4
Loss at step 2500 : 0.9357032179832458
Training accuracy : 78.7
Validation accuracy : 78.6
Loss at step 3000 : 0.8860827684402466
Training accuracy : 77.3
Validation accuracy : 79.0
Test accuracy : 86.5


### L2 regularization
Notice in convolution.py they even regularized biases

For me I stick with the old regulariation term keep dividing with the batch size (?)

In [7]:
batch_size = 128
hidden_units = 1024
Lambda = 1e-5
graph = tf.Graph()
with graph.as_default():
    tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_size*image_size))
    tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
    tf_validation_dataset = tf.constant(valid_dataset)
    tf_test_dataset = tf.constant(test_dataset)
    
    #Variables
    weights1 = tf.Variable(tf.truncated_normal([image_size*image_size, hidden_units]))
    biases1 = tf.Variable(tf.zeros([hidden_units]))
    weights2 = tf.Variable(tf.truncated_normal([hidden_units, num_labels]))
    biases2 =tf.Variable(tf.zeros([num_labels]))
    
    #Training
    hidden1 = tf.nn.relu(tf.matmul(tf_train_dataset, weights1) + biases1)
    logits = tf.matmul(hidden1, weights2) + biases2
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels = tf_train_labels, logits = logits))\
    + (Lambda/(2*batch_size))*(tf.nn.l2_loss(weights1) +  tf.nn.l2_loss(weights2))
    
    #Optimization
    optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
    
    #Predictions
    train_predictions = tf.nn.softmax(logits)
    hidden_valid = tf.nn.relu(tf.matmul(tf_validation_dataset, weights1) + biases1)
    valid_predictions = tf.nn.softmax(tf.matmul(hidden_valid, weights2) + biases2)
    hidden_test = tf.nn.relu(tf.matmul(tf_test_dataset, weights1) + biases1)
    test_predictions = tf.nn.softmax(tf.matmul(hidden_test, weights2) + biases2)

In [8]:
#train
num_steps = 3001
with tf.Session(graph = graph) as session:
    tf.global_variables_initializer().run()
    print("Initialized")
    for step in np.arange(num_steps):
        offset = (step * batch_size) % (train_labels.shape[0]- batch_size)
        batch_data = train_dataset[offset:(offset + batch_size), :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
        feed_dict = {tf_train_dataset:batch_data, tf_train_labels:batch_labels}
        _, l, predictions = session.run([optimizer, loss, train_predictions], feed_dict = feed_dict)
        if step%500 == 0:
            print("Minibatch loss at {0}: {1}".format(step, l))
            print("Minibatch accuray : %.1f%%" % accuracy(predictions, batch_labels))
            print("Validation accuracy: %.1f%%" % accuracy(
                        valid_predictions.eval(), valid_labels))
    print("Test accuracy: %.1f%%" % accuracy(test_predictions.eval(), test_labels))

Initialized
Minibatch loss at 0: 314.1051940917969
Minibatch accuray : 9.4%
Validation accuracy: 29.3%
Minibatch loss at 500: 18.478822708129883
Minibatch accuray : 85.9%
Validation accuracy: 80.4%
Minibatch loss at 1000: 15.604433059692383
Minibatch accuray : 74.2%
Validation accuracy: 80.6%
Minibatch loss at 1500: 5.6252827644348145
Minibatch accuray : 86.7%
Validation accuracy: 80.5%
Minibatch loss at 2000: 3.5708141326904297
Minibatch accuray : 85.2%
Validation accuracy: 81.8%
Minibatch loss at 2500: 3.611917018890381
Minibatch accuray : 85.2%
Validation accuracy: 82.1%
Minibatch loss at 3000: 2.2162792682647705
Minibatch accuray : 83.6%
Validation accuracy: 82.5%
Test accuracy: 89.4%


#### Problem2: Extreme Case of overfitting 

In [9]:
#Problem2
batch_size = 128
hidden_units = 1024
Lambda = 1e-5
graph = tf.Graph()
with graph.as_default():
    tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_size*image_size))
    tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
    tf_validation_dataset = tf.constant(valid_dataset)
    tf_test_dataset = tf.constant(test_dataset)
    
    #Variables
    weights1 = tf.Variable(tf.truncated_normal([image_size*image_size, hidden_units]))
    biases1 = tf.Variable(tf.zeros([hidden_units]))
    weights2 = tf.Variable(tf.truncated_normal([hidden_units, num_labels]))
    biases2 =tf.Variable(tf.zeros([num_labels]))
    
    #Training
    hidden1 = tf.nn.relu(tf.matmul(tf_train_dataset, weights1) + biases1)
    logits = tf.matmul(hidden1, weights2) + biases2
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels = tf_train_labels, logits = logits))\
    + (Lambda/(2*batch_size))*(tf.nn.l2_loss(weights1) +  tf.nn.l2_loss(weights2))
    
    #Optimization
    optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
    
    #Predictions
    train_predictions = tf.nn.softmax(logits)
    hidden_valid = tf.nn.relu(tf.matmul(tf_validation_dataset, weights1) + biases1)
    valid_predictions = tf.nn.softmax(tf.matmul(hidden_valid, weights2) + biases2)
    hidden_test = tf.nn.relu(tf.matmul(tf_test_dataset, weights1) + biases1)
    test_predictions = tf.nn.softmax(tf.matmul(hidden_test, weights2) + biases2)

In [10]:
#train
num_steps = 3001
with tf.Session(graph = graph) as session:
    tf.global_variables_initializer().run()
    print("Initialized")
    for step in np.arange(num_steps):
        offset = np.random.choice(np.arange(10))
        batch_data = train_dataset[offset:(offset + batch_size), :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
        feed_dict = {tf_train_dataset:batch_data, tf_train_labels:batch_labels}
        _, l, predictions = session.run([optimizer, loss, train_predictions], feed_dict = feed_dict)
        if step%500 == 0:
            print("Minibatch loss at {0}: {1}".format(step, l))
            print("Minibatch accuray : %.1f%%" % accuracy(predictions, batch_labels))
            print("Validation accuracy: %.1f%%" % accuracy(
                        valid_predictions.eval(), valid_labels))
    print("Test accuracy: %.1f%%" % accuracy(test_predictions.eval(), test_labels))

Initialized
Minibatch loss at 0: 452.1272277832031
Minibatch accuray : 5.5%
Validation accuracy: 32.0%
Minibatch loss at 500: 0.012293349951505661
Minibatch accuray : 100.0%
Validation accuracy: 62.8%
Minibatch loss at 1000: 0.012293349951505661
Minibatch accuray : 100.0%
Validation accuracy: 62.8%
Minibatch loss at 1500: 0.012293349951505661
Minibatch accuray : 100.0%
Validation accuracy: 62.8%
Minibatch loss at 2000: 0.012293349951505661
Minibatch accuray : 100.0%
Validation accuracy: 62.8%
Minibatch loss at 2500: 0.012293349951505661
Minibatch accuray : 100.0%
Validation accuracy: 62.8%
Minibatch loss at 3000: 0.012293349951505661
Minibatch accuray : 100.0%
Validation accuracy: 62.8%
Test accuracy: 69.3%


### Trying to adress previous overfitting with dropout

In [11]:
#Dropout
hidden_units = 1024
batch_size = 128
Lambda = 1e-5
graph = tf.Graph()
with graph.as_default():
    tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_size*image_size))
    tf_train_labels = tf.placeholder(tf.float32, shape = (batch_size, num_labels))
    keep_prob = tf.placeholder(tf.float32)
    tf_validation_dataset = tf.constant(valid_dataset)
    tf_test_dataset = tf.constant(test_dataset)
    
    #Variables
    weights1 = tf.Variable(tf.truncated_normal([image_size*image_size, hidden_units]))
    biases1 = tf.Variable(tf.zeros([hidden_units]))
    weights2 = tf.Variable(tf.truncated_normal([hidden_units, num_labels]))
    biases2 = tf.Variable(tf.zeros([num_labels]))
    
    #Training
    hidden1 = tf.nn.relu(tf.matmul(tf_train_dataset, weights1) + biases1)
    hidden1_drop = tf.nn.dropout(hidden1, keep_prob)
    logits = tf.matmul(hidden1_drop, weights2) + biases2
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels = tf_train_labels, logits = logits))\
    + (Lambda/(2*batch_size))*(tf.nn.l2_loss(weights1) + tf.nn.l2_loss(weights2))
    
    #Optimization
    optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
    
    #Predictions
    train_predictions = tf.nn.softmax(logits)
    hidden_valid = tf.nn.relu(tf.matmul(tf_validation_dataset, weights1) + biases1)
    hidden_valid_drop = tf.nn.dropout(hidden_valid, 1.0)
    valid_predictions = tf.nn.softmax(tf.matmul(hidden_valid_drop, weights2) + biases2)
    hidden_test = tf.nn.relu(tf.matmul(tf_test_dataset, weights1) + biases1)
    hidden_test_drop = tf.nn.dropout(hidden_test, 1.0)
    test_predictions = tf.nn.softmax(tf.matmul(hidden_test_drop, weights2) + biases2)

In [12]:
num_steps = 3001
with tf.Session(graph = graph) as session:
    tf.global_variables_initializer().run()
    print('Initialized')
    for step in np.arange(num_steps):
        offset = np.random.choice(np.arange(10))
        batch_data = train_dataset[offset:(offset + batch_size), :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
        feed_dict = {tf_train_dataset:batch_data, tf_train_labels:batch_labels, keep_prob:0.5}
        _, l, predictions = session.run([optimizer, loss, train_predictions], feed_dict=feed_dict)
        if step%500 == 0:
            print('Minibatch error at step {0}:{1}'.format(step, l))
            print('Minibatch accuracy : %.1f'%accuracy(predictions, batch_labels))
            print('Validation accuracy : %1.f'%accuracy(valid_predictions.eval(), valid_labels))
    print('Test accuracy : %.1f'%accuracy(test_predictions.eval(), test_labels))

Initialized
Minibatch error at step 0:539.4149780273438
Minibatch accuracy : 13.3
Validation accuracy : 24
Minibatch error at step 500:0.012312044389545918
Minibatch accuracy : 100.0
Validation accuracy : 70
Minibatch error at step 1000:0.01231517642736435
Minibatch accuracy : 100.0
Validation accuracy : 70
Minibatch error at step 1500:0.012316519394516945
Minibatch accuracy : 100.0
Validation accuracy : 70
Minibatch error at step 2000:0.012318098917603493
Minibatch accuracy : 100.0
Validation accuracy : 69
Minibatch error at step 2500:0.01231920626014471
Minibatch accuracy : 100.0
Validation accuracy : 70
Minibatch error at step 3000:0.012319641187787056
Minibatch accuracy : 100.0
Validation accuracy : 71
Test accuracy : 78.0


### <b>Exponential rate decay </b> with a Neural Network

In [13]:
batch_size = 128
hidden_units = 1024
Lambda = 1e-5
graph = tf.Graph()
with graph.as_default():
    tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_size*image_size))
    tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
    tf_validation_dataset = tf.constant(valid_dataset)
    tf_test_dataset = tf.constant(test_dataset)
    
    #Variables
    weights1 = tf.Variable(tf.truncated_normal([image_size*image_size, hidden_units]))
    biases1 = tf.Variable(tf.zeros([hidden_units]))
    weights2 = tf.Variable(tf.truncated_normal([hidden_units, num_labels]))
    biases2 =tf.Variable(tf.zeros([num_labels]))
    
    #Training
    hidden1 = tf.nn.relu(tf.matmul(tf_train_dataset, weights1) + biases1)
    logits = tf.matmul(hidden1, weights2) + biases2
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels = tf_train_labels, logits = logits))\
    + (Lambda/(2*batch_size))*(tf.nn.l2_loss(weights1) +  tf.nn.l2_loss(weights2))
    
    #Optimization
    global_step = tf.Variable(0, trainable = False)
    learning_rate = tf.train.exponential_decay(0.7, global_step, 100000, 0.96, staircase = True)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step = global_step)
    
    #Predictions
    train_predictions = tf.nn.softmax(logits)
    hidden_valid = tf.nn.relu(tf.matmul(tf_validation_dataset, weights1) + biases1)
    valid_predictions = tf.nn.softmax(tf.matmul(hidden_valid, weights2) + biases2)
    hidden_test = tf.nn.relu(tf.matmul(tf_test_dataset, weights1) + biases1)
    test_predictions = tf.nn.softmax(tf.matmul(hidden_test, weights2) + biases2)

In [14]:
#train
num_steps = 3001
with tf.Session(graph = graph) as session:
    tf.global_variables_initializer().run()
    print("Initialized")
    for step in np.arange(num_steps):
        offset = (step * batch_size) % (train_labels.shape[0]- batch_size)
        batch_data = train_dataset[offset:(offset + batch_size), :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
        feed_dict = {tf_train_dataset:batch_data, tf_train_labels:batch_labels}
        _, l, predictions = session.run([optimizer, loss, train_predictions], feed_dict = feed_dict)
        if step%500 == 0:
            print("Minibatch loss at {0}: {1}".format(step, l))
            print("Minibatch accuray : %.1f%%" % accuracy(predictions, batch_labels))
            print("Validation accuracy: %.1f%%" % accuracy(
                        valid_predictions.eval(), valid_labels))
    print("Test accuracy: %.1f%%" % accuracy(test_predictions.eval(), test_labels))

Initialized
Minibatch loss at 0: 320.52069091796875
Minibatch accuray : 7.8%
Validation accuracy: 28.6%
Minibatch loss at 500: 29.074594497680664
Minibatch accuray : 72.7%
Validation accuracy: 77.2%
Minibatch loss at 1000: 10.491528511047363
Minibatch accuray : 75.8%
Validation accuracy: 80.4%
Minibatch loss at 1500: 6.758317470550537
Minibatch accuray : 86.7%
Validation accuracy: 80.4%
Minibatch loss at 2000: 1.0255519151687622
Minibatch accuray : 86.7%
Validation accuracy: 81.4%
Minibatch loss at 2500: 1.9318013191223145
Minibatch accuray : 80.5%
Validation accuracy: 81.8%
Minibatch loss at 3000: 1.5358901023864746
Minibatch accuray : 78.9%
Validation accuracy: 81.6%
Test accuracy: 89.1%


### Deep Neural Network with regularization

1. I can use dropout with L2 regularization : seems to reduce overfitting
2. what about dropout + L2 regularization + learning rate decay
3. Train a Neural Network with dropout after the last hidden layer