Deep Learning
=============

Assignment 3
------------

Previously in `2_fullyconnected.ipynb`, you trained a logistic regression and a neural network model.

The goal of this assignment is to explore regularization techniques.

In [1]:
# These are all the modules we'll be using later. Make sure you can import them
# before proceeding further.
from __future__ import print_function
import numpy as np
import tensorflow as tf
from six.moves import cPickle as pickle

First reload the data we generated in _notmist.ipynb_.

In [3]:
pickle_file = 'notMNIST.pickle'

with open(pickle_file, 'rb') as f:
  save = pickle.load(f,encoding='latin1')
  train_dataset = save['train_dataset']
  train_labels = save['train_labels']
  valid_dataset = save['valid_dataset']
  valid_labels = save['valid_labels']
  test_dataset = save['test_dataset']
  test_labels = save['test_labels']
  del save  # hint to help gc free up memory
  print('Training set', train_dataset.shape, train_labels.shape)
  print('Validation set', valid_dataset.shape, valid_labels.shape)
  print('Test set', test_dataset.shape, test_labels.shape)

Training set (200000, 28, 28) (200000,)
Validation set (10000, 28, 28) (10000,)
Test set (10000, 28, 28) (10000,)


Reformat into a shape that's more adapted to the models we're going to train:
- data as a flat matrix,
- labels as float 1-hot encodings.

In [4]:
image_size = 28
num_labels = 10

def reformat(dataset, labels):
  dataset = dataset.reshape((-1, image_size * image_size)).astype(np.float32)
  # Map 1 to [0.0, 1.0, 0.0 ...], 2 to [0.0, 0.0, 1.0 ...]
  labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
  return dataset, labels
train_dataset, train_labels = reformat(train_dataset, train_labels)
valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)

Training set (200000, 784) (200000, 10)
Validation set (10000, 784) (10000, 10)
Test set (10000, 784) (10000, 10)


In [5]:
def accuracy(predictions, labels):
  return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
          / predictions.shape[0])

---
Problem 1
---------

Introduce and tune L2 regularization for both logistic and neural network models. Remember that L2 amounts to adding a penalty on the norm of the weights to the loss. In TensorFlow, you can compute the L2 loss for a tensor `t` using `nn.l2_loss(t)`. The right amount of regularization should improve your validation / test accuracy.

---

In [67]:
def paras_train(x,y):
    # Variables.
    weights = tf.Variable(tf.truncated_normal([x, y]))
    biases = tf.Variable(tf.zeros([y]))
    return weights,biases

In [162]:
def logistic_function(num_steps,num_batches, x, y, l2_weights,batch_size,valid_dataset,test_dataset):
    graph = tf.Graph()
    with graph.as_default():
        # Input data.
        tf_train_dataset = tf.placeholder(tf.float32,
                                          shape=(batch_size, x))
        tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, y))
        tf_valid_dataset = tf.constant(valid_dataset)
        tf_test_dataset = tf.constant(test_dataset)

        # Variables.
        weights, biases = paras_train(x, y)

        # Training computation.
        logits = tf.matmul(tf_train_dataset, weights) + biases
        loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels))
        l2_loss = tf.nn.l2_loss(weights)
        sum_loss = loss + l2_loss*l2_weights

        # Optimizer.
        optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(sum_loss)

        # Predictions for the training, validation, and test data.
        train_prediction = tf.nn.softmax(logits)
        valid_prediction = tf.nn.softmax(
            tf.matmul(tf_valid_dataset, weights) + biases)
        test_prediction = tf.nn.softmax(
            tf.matmul(tf_test_dataset, weights) + biases)
    
    

    with tf.Session(graph=graph) as session:
        tf.global_variables_initializer().run()
        print("Initialized")
        for step in range(num_steps):
            # Pick an offset within the training data, which has been randomized.
            # Note: we could use better randomization across epochs.
            if(num_batches != None):
                offset = step % num_batches
            else:
                offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
            # Generate a minibatch.
            batch_data = train_dataset[offset:(offset + batch_size), :]
            batch_labels = train_labels[offset:(offset + batch_size), :]
            # Prepare a dictionary telling the session where to feed the minibatch.
            # The key of the dictionary is the placeholder node of the graph to be fed,
            # and the value is the numpy array to feed to it.
            feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
            _, l, predictions = session.run(
                [optimizer, loss, train_prediction], feed_dict=feed_dict)
            if (step % (num_steps/5) == 0):
                print("Minibatch loss at step %d: %f" % (step, l))
                print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels))
                print("Validation accuracy: %.1f%%" % accuracy(
                        valid_prediction.eval(), valid_labels))
        print("Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), test_labels))


In [164]:
def nn_function(num_steps, num_batches, x, y, hidden_layer_nodes, l2_weights, batch_size,valid_dataset,test_dataset): 
    graph = tf.Graph()
    with graph.as_default():
        # Input data.
        tf_train_dataset = tf.placeholder(tf.float32,
                                          shape=(batch_size, x))
        tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, y))
        tf_valid_dataset = tf.constant(valid_dataset)
        tf_test_dataset = tf.constant(test_dataset)

        # Variables.
        weights_1, biases_1 = paras_train(x, hidden_layer_nodes)
        weights_2, biases_2 = paras_train(hidden_layer_nodes,y)

        # Training computation.
        logits_1 = tf.matmul(tf_train_dataset, weights_1) + biases_1
        tf_train_dataset_2 = tf.nn.relu(logits_1)
        logits_2 = tf.matmul(tf_train_dataset_2, weights_2) + biases_2
        loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits_2, tf_train_labels))
        l2_loss = tf.nn.l2_loss(weights_1) + tf.nn.l2_loss(weights_2)
        sum_loss = loss + l2_loss*l2_weights

        # Optimizer.
        optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(sum_loss)

        # Predictions for the training, validation, and test data.
        train_prediction = tf.nn.softmax(logits_2)
        valid_prediction = tf.nn.softmax(
            tf.matmul(tf.nn.relu(tf.matmul(tf_valid_dataset, weights_1) + biases_1), weights_2) + biases_2)
        test_prediction = tf.nn.softmax(
            tf.matmul(tf.nn.relu(tf.matmul(tf_test_dataset,  weights_1) + biases_1), weights_2) + biases_2)
    
    
    
    with tf.Session(graph=graph) as session:
        tf.global_variables_initializer().run()
        print("Initialized")
        for step in range(num_steps):
            # Pick an offset within the training data, which has been randomized.
            # Note: we could use better randomization across epochs.
            if(num_batches != None):
                offset = step % num_batches
            else:
                offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
            # Generate a minibatch.
            batch_data = train_dataset[offset:(offset + batch_size), :]
            batch_labels = train_labels[offset:(offset + batch_size), :]
            # Prepare a dictionary telling the session where to feed the minibatch.
            # The key of the dictionary is the placeholder node of the graph to be fed,
            # and the value is the numpy array to feed to it.
            feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
            _, l, predictions = session.run(
                [optimizer, sum_loss, train_prediction], feed_dict=feed_dict)
            if (step % (num_steps/5) == 0):
                print("Minibatch loss at step %d: %f" % (step, l))
                print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels))
                print("Validation accuracy: %.1f%%" % accuracy(
                        valid_prediction.eval(), valid_labels))
        print("Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), test_labels))

    

In [160]:
x = image_size*image_size
y = num_labels
batch_size = 128
l2_weights = 1e-3
num_steps = 3001
num_batches = None
logistic_function(num_steps,num_batches, x, y, l2_weights,batch_size,valid_dataset,test_dataset)  


Initialized
Minibatch loss at step 0: 15.271442
Minibatch accuracy: 9.4%
Validation accuracy: 13.0%
Minibatch loss at step 500: 1.170235
Minibatch accuracy: 78.9%
Validation accuracy: 76.3%
Minibatch loss at step 1000: 1.081265
Minibatch accuracy: 76.6%
Validation accuracy: 78.3%
Minibatch loss at step 1500: 0.566859
Minibatch accuracy: 83.6%
Validation accuracy: 79.8%
Minibatch loss at step 2000: 0.598325
Minibatch accuracy: 89.1%
Validation accuracy: 80.9%
Minibatch loss at step 2500: 0.700691
Minibatch accuracy: 79.7%
Validation accuracy: 81.3%
Minibatch loss at step 3000: 0.691002
Minibatch accuracy: 82.8%
Validation accuracy: 81.8%
Test accuracy: 88.9%


In [161]:
x = image_size*image_size
y = num_labels
batch_size = 128
l2_weights = 1e-3
hidden_layer_nodes = 1024
num_steps = 3001
num_batches = None
nn_function(num_steps,num_batches, x, y, hidden_layer_nodes, l2_weights,batch_size,valid_dataset,test_dataset)

Initialized
Minibatch loss at step 0: 721.238892
Minibatch accuracy: 7.0%
Validation accuracy: 33.4%
Minibatch loss at step 500: 193.538162
Minibatch accuracy: 86.7%
Validation accuracy: 79.7%
Minibatch loss at step 1000: 114.826401
Minibatch accuracy: 80.5%
Validation accuracy: 82.1%
Minibatch loss at step 1500: 68.734932
Minibatch accuracy: 89.1%
Validation accuracy: 83.3%
Minibatch loss at step 2000: 41.165405
Minibatch accuracy: 93.8%
Validation accuracy: 84.8%
Minibatch loss at step 2500: 25.204725
Minibatch accuracy: 86.7%
Validation accuracy: 85.8%
Minibatch loss at step 3000: 15.432253
Minibatch accuracy: 89.1%
Validation accuracy: 86.3%
Test accuracy: 92.8%


**Better than 86.7% previous result**

---
Problem 2
---------
Let's demonstrate an extreme case of overfitting. Restrict your training data to just a few batches. What happens?

---

In [165]:
x = image_size*image_size
y = num_labels
batch_size = 128
l2_weights = 1e-3
num_steps = 150
num_batches = 5
logistic_function(num_steps,num_batches, x, y, l2_weights,batch_size,valid_dataset,test_dataset)  

Initialized
Minibatch loss at step 0: 15.567070
Minibatch accuracy: 10.9%
Validation accuracy: 13.5%
Minibatch loss at step 30: 0.619145
Minibatch accuracy: 90.6%
Validation accuracy: 41.8%
Minibatch loss at step 60: 0.130376
Minibatch accuracy: 99.2%
Validation accuracy: 45.8%
Minibatch loss at step 90: 0.039852
Minibatch accuracy: 100.0%
Validation accuracy: 47.3%
Minibatch loss at step 120: 0.026610
Minibatch accuracy: 100.0%
Validation accuracy: 48.2%
Test accuracy: 53.4%


In [166]:
x = image_size*image_size
y = num_labels
batch_size = 20
l2_weights = 1e-3
hidden_layer_nodes = 1024
num_steps = 150
num_batches = 5
nn_function(num_steps,num_batches, x, y, hidden_layer_nodes, l2_weights,batch_size,valid_dataset,test_dataset)

Initialized
Minibatch loss at step 0: 833.989868
Minibatch accuracy: 5.0%
Validation accuracy: 16.4%
Minibatch loss at step 30: 310.087341
Minibatch accuracy: 100.0%
Validation accuracy: 26.5%
Minibatch loss at step 60: 300.920654
Minibatch accuracy: 100.0%
Validation accuracy: 26.5%
Minibatch loss at step 90: 292.024933
Minibatch accuracy: 100.0%
Validation accuracy: 26.5%
Minibatch loss at step 120: 283.392273
Minibatch accuracy: 100.0%
Validation accuracy: 26.6%
Test accuracy: 27.6%


## It is overfitting, train accuracy reach 100% but test are low

---
Problem 3
---------
Introduce Dropout on the hidden layer of the neural network. Remember: Dropout should only be introduced during training, not evaluation, otherwise your evaluation results would be stochastic as well. TensorFlow provides `nn.dropout()` for that, but you have to make sure it's only inserted during training.

What happens to our extreme overfitting case?

---

In [171]:
def nn_dropout_function(num_steps,num_batches, x, y, hidden_layer_nodes, l2_weights, batch_size,valid_dataset,test_dataset): 
    graph = tf.Graph()
    with graph.as_default():
        # Input data.
        tf_train_dataset = tf.placeholder(tf.float32,
                                          shape=(batch_size, x))
        tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, y))
        tf_valid_dataset = tf.constant(valid_dataset)
        tf_test_dataset = tf.constant(test_dataset)
        
        # Dropout
        dropout = tf.placeholder(tf.float32)

        # Variables.
        weights_1, biases_1 = paras_train(x, hidden_layer_nodes)
        weights_2, biases_2 = paras_train(hidden_layer_nodes,y)

        # Training computation.
        logits_1 = tf.matmul(tf_train_dataset, weights_1) + biases_1        
        ## Add dropout
        tf_train_dataset_2 = tf.nn.dropout(tf.nn.relu(logits_1),dropout)
        logits_2 = tf.matmul(tf_train_dataset_2, weights_2) + biases_2
        loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits_2, tf_train_labels))
        l2_loss = tf.nn.l2_loss(weights_1) + tf.nn.l2_loss(weights_2)
        sum_loss = loss + l2_loss*l2_weights

        # Optimizer.
        optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(sum_loss)

        # Predictions for the training, validation, and test data.
        train_prediction = tf.nn.softmax(logits_2)
        valid_prediction = tf.nn.softmax(
            tf.matmul(tf.nn.relu(tf.matmul(tf_valid_dataset, weights_1) + biases_1), weights_2) + biases_2)
        test_prediction = tf.nn.softmax(
            tf.matmul(tf.nn.relu(tf.matmul(tf_test_dataset,  weights_1) + biases_1), weights_2) + biases_2)
    
    
    
    with tf.Session(graph=graph) as session:
        tf.global_variables_initializer().run()
        print("Initialized")
        for step in range(num_steps):
            # Pick an offset within the training data, which has been randomized.
            # Note: we could use better randomization across epochs.
            if(num_batches != None):
                offset = step % num_batches
            else:
                offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
            # Generate a minibatch.
            batch_data = train_dataset[offset:(offset + batch_size), :]
            batch_labels = train_labels[offset:(offset + batch_size), :]
            # Prepare a dictionary telling the session where to feed the minibatch.
            # The key of the dictionary is the placeholder node of the graph to be fed,
            # and the value is the numpy array to feed to it.
            feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels, dropout: 0.5}
            _, l, predictions = session.run(
                [optimizer, sum_loss, train_prediction], feed_dict=feed_dict)
            if (step % (num_steps/5) == 0):
                print("Minibatch loss at step %d: %f" % (step, l))
                print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels))
                print("Validation accuracy: %.1f%%" % accuracy(
                        valid_prediction.eval(), valid_labels))
        print("Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), test_labels))

    

In [173]:
x = image_size*image_size
y = num_labels
batch_size = 128
l2_weights = 1e-3
hidden_layer_nodes = 1024
num_steps = 150
num_batches = 10
print("using batch number %d" %(num_batches))
nn_function(num_steps,num_batches, x, y, hidden_layer_nodes, l2_weights,batch_size,valid_dataset,test_dataset)
nn_dropout_function(num_steps,num_batches, x, y, hidden_layer_nodes, l2_weights, batch_size,valid_dataset,test_dataset)

using batch number 10
Initialized
Minibatch loss at step 0: 593.364563
Minibatch accuracy: 6.2%
Validation accuracy: 35.1%
Minibatch loss at step 30: 305.212494
Minibatch accuracy: 100.0%
Validation accuracy: 70.0%
Minibatch loss at step 60: 296.189941
Minibatch accuracy: 100.0%
Validation accuracy: 70.0%
Minibatch loss at step 90: 287.433960
Minibatch accuracy: 100.0%
Validation accuracy: 70.0%
Minibatch loss at step 120: 278.937012
Minibatch accuracy: 100.0%
Validation accuracy: 70.0%
Test accuracy: 76.4%
Initialized
Minibatch loss at step 0: 730.753784
Minibatch accuracy: 14.1%
Validation accuracy: 26.0%
Minibatch loss at step 30: 305.395996
Minibatch accuracy: 100.0%
Validation accuracy: 68.2%
Minibatch loss at step 60: 296.433899
Minibatch accuracy: 100.0%
Validation accuracy: 70.3%
Minibatch loss at step 90: 287.715881
Minibatch accuracy: 100.0%
Validation accuracy: 70.1%
Minibatch loss at step 120: 279.233826
Minibatch accuracy: 100.0%
Validation accuracy: 70.4%
Test accuracy: 7

## The dropout can improve the test accuracy

---
Problem 4
---------

Try to get the best performance you can using a multi-layer model! The best reported test accuracy using a deep network is [97.1%](http://yaroslavvb.blogspot.com/2011/09/notmnist-dataset.html?showComment=1391023266211#c8758720086795711595).

One avenue you can explore is to add multiple layers.

Another one is to use learning rate decay:

    global_step = tf.Variable(0)  # count the number of steps taken.
    learning_rate = tf.train.exponential_decay(0.5, global_step, ...)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
 
 ---


In [208]:
def multilayer_function(x, y, hidden_layer_nodes,layer_number,dataset):
    weights = [0]*layer_number
    biases =  [0]*layer_number
    l2_loss = 0
    for i in range(layer_number):
        if i == 0:
            weights[i], biases[i] = paras_train(x, hidden_layer_nodes)
            logits = tf.matmul(dataset, weights[i]) + biases[i]
        elif i == layer_number -1:
            weights[i], biases[i] = paras_train(hidden_layer_nodes,y)
            logits = tf.matmul(tf.nn.relu(logits), weights[i]) + biases[i]
        
        else:
            weights[i], biases[i] = paras_train(hidden_layer_nodes,hidden_layer_nodes)
            logits = tf.matmul(tf.nn.relu(logits), weights[i]) + biases[i]
        l2_loss += tf.nn.l2_loss(weights[i])
    return weights,biases,logits,l2_loss
    

In [212]:
def multilayer_prediction(dataset,weights,biases):
    for i in range(len(weights)):
        if i == 0:
            a = tf.nn.relu(tf.matmul(dataset, weights[i]) + biases[i])
        elif i == len(weights)-1:
            a = tf.nn.softmax(tf.matmul(a, weights[i]) + biases[i])
        else:
            a = tf.nn.relu(tf.matmul(a, weights[i]) + biases[i])
        
    return a

In [215]:
def nn_multilayer_function(num_steps,num_batches, x, y, hidden_layer_nodes,layer_number, l2_weights, batch_size,valid_dataset,test_dataset): 
    graph = tf.Graph()
    with graph.as_default():
        # Input data.
        tf_train_dataset = tf.placeholder(tf.float32,
                                          shape=(batch_size, x))
        tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, y))
        tf_valid_dataset = tf.constant(valid_dataset)
        tf_test_dataset = tf.constant(test_dataset)
        
        # Variables.
        weights,biases,logits,l2_loss = multilayer_function(x, y, hidden_layer_nodes,layer_number,tf_train_dataset)

        loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels))
        
        sum_loss = loss + l2_loss*l2_weights

        # Optimizer.
        global_step = tf.Variable(0)  # count the number of steps taken.
        learning_rate = tf.train.exponential_decay(0.5, global_step, 100000, 0.96)
        optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
        optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(sum_loss)

        # Predictions for the training, validation, and test data.
        train_prediction = tf.nn.softmax(logits)
        valid_prediction = multilayer_prediction(tf_valid_dataset, weights, biases)
        test_prediction =  multilayer_prediction(tf_test_dataset,  weights, biases)
    
    
    with tf.Session(graph=graph) as session:
        tf.global_variables_initializer().run()
        print("Initialized")
        for step in range(num_steps):
            # Pick an offset within the training data, which has been randomized.
            # Note: we could use better randomization across epochs.
            if(num_batches != None):
                offset = step % num_batches
            else:
                offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
            # Generate a minibatch.
            batch_data = train_dataset[offset:(offset + batch_size), :]
            batch_labels = train_labels[offset:(offset + batch_size), :]
            # Prepare a dictionary telling the session where to feed the minibatch.
            # The key of the dictionary is the placeholder node of the graph to be fed,
            # and the value is the numpy array to feed to it.
            feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
            _, l, predictions = session.run(
                [optimizer, sum_loss, train_prediction], feed_dict=feed_dict)
            if (step % (num_steps/5) == 0):
                print("Minibatch loss at step %d: %f" % (step, l))
                print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels))
                print("Validation accuracy: %.1f%%" % accuracy(
                        valid_prediction.eval(), valid_labels))
        print("Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), test_labels))

    

In [217]:
x = image_size*image_size
y = num_labels
batch_size = 128
l2_weights = 1e-3
hidden_layer_nodes = 1024
layer_number = 2
num_steps = 4000
num_batches = None 
nn_multilayer_function(num_steps,num_batches, x, y, hidden_layer_nodes,layer_number, l2_weights, batch_size,valid_dataset,test_dataset)

Initialized
Minibatch loss at step 0: 665.523376
Minibatch accuracy: 8.6%
Validation accuracy: 28.2%
Minibatch loss at step 800: 140.468048
Minibatch accuracy: 85.9%
Validation accuracy: 81.0%
Minibatch loss at step 1600: 61.988884
Minibatch accuracy: 82.0%
Validation accuracy: 83.7%
Minibatch loss at step 2400: 27.762058
Minibatch accuracy: 89.1%
Validation accuracy: 85.4%
Minibatch loss at step 3200: 12.654583
Minibatch accuracy: 88.3%
Validation accuracy: 86.9%
Test accuracy: 93.6%


In [218]:
x = image_size*image_size
y = num_labels
batch_size = 128
l2_weights = 1e-3
hidden_layer_nodes = 1024
layer_number = 3
num_steps = 4000
num_batches = None 
nn_multilayer_function(num_steps,num_batches, x, y, hidden_layer_nodes,layer_number, l2_weights, batch_size,valid_dataset,test_dataset)

Initialized
Minibatch loss at step 0: 8295.061523
Minibatch accuracy: 7.8%
Validation accuracy: 14.8%
Minibatch loss at step 800: nan
Minibatch accuracy: 11.7%
Validation accuracy: 10.0%
Minibatch loss at step 1600: nan
Minibatch accuracy: 7.8%
Validation accuracy: 10.0%
Minibatch loss at step 2400: nan
Minibatch accuracy: 9.4%
Validation accuracy: 10.0%
Minibatch loss at step 3200: nan
Minibatch accuracy: 11.7%
Validation accuracy: 10.0%
Test accuracy: 10.0%


In [232]:
def multilayer_hidden_nodes_reduce_function(x, y, hidden_layer_nodes,layer_number,dataset):
    weights = [0]*layer_number
    biases =  [0]*layer_number
    l2_loss = 0
    for i in range(layer_number):
        if i == 0:
            num = int(hidden_layer_nodes/(2**i))
            print(num)
            weights[i], biases[i] = paras_train(x, num)
            logits = tf.matmul(dataset, weights[i]) + biases[i]
        elif i == layer_number -1:
            num = int(hidden_layer_nodes/(2**(i-1)))
            print(num)
            weights[i], biases[i] = paras_train(num,y)
            logits = tf.matmul(tf.nn.relu(logits), weights[i]) + biases[i]
        else:
            num_1 = int(hidden_layer_nodes/(2**i))
            num_2 = int(hidden_layer_nodes/(2**(i-1)))
            print(num_1,num_2)
            weights[i], biases[i] = paras_train(num_2,num_1)
            logits = tf.matmul(tf.nn.relu(logits), weights[i]) + biases[i]
        l2_loss += tf.nn.l2_loss(weights[i])
    return weights,biases,logits,l2_loss
    

In [233]:
def nn_multilayer_hidden_nodes_reduce_function(num_steps,num_batches, x, y, hidden_layer_nodes,layer_number, l2_weights, batch_size,valid_dataset,test_dataset): 
    graph = tf.Graph()
    with graph.as_default():
        # Input data.
        tf_train_dataset = tf.placeholder(tf.float32,
                                          shape=(batch_size, x))
        tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, y))
        tf_valid_dataset = tf.constant(valid_dataset)
        tf_test_dataset = tf.constant(test_dataset)
        
        # Variables.
        weights,biases,logits,l2_loss = multilayer_hidden_nodes_reduce_function(x, y, hidden_layer_nodes,layer_number,tf_train_dataset)

        loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels))
        
        sum_loss = loss + l2_loss*l2_weights

        # Optimizer.
        global_step = tf.Variable(0)  # count the number of steps taken.
        learning_rate = tf.train.exponential_decay(0.5, global_step, 100000, 0.96)
        optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
        optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(sum_loss)

        # Predictions for the training, validation, and test data.
        train_prediction = tf.nn.softmax(logits)
        valid_prediction = multilayer_prediction(tf_valid_dataset, weights, biases)
        test_prediction =  multilayer_prediction(tf_test_dataset,  weights, biases)
    
    
    with tf.Session(graph=graph) as session:
        tf.global_variables_initializer().run()
        print("Initialized")
        for step in range(num_steps):
            # Pick an offset within the training data, which has been randomized.
            # Note: we could use better randomization across epochs.
            if(num_batches != None):
                offset = step % num_batches
            else:
                offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
            # Generate a minibatch.
            batch_data = train_dataset[offset:(offset + batch_size), :]
            batch_labels = train_labels[offset:(offset + batch_size), :]
            # Prepare a dictionary telling the session where to feed the minibatch.
            # The key of the dictionary is the placeholder node of the graph to be fed,
            # and the value is the numpy array to feed to it.
            feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
            _, l, predictions = session.run(
                [optimizer, sum_loss, train_prediction], feed_dict=feed_dict)
            if (step % (num_steps/5) == 0):
                print("Minibatch loss at step %d: %f" % (step, l))
                print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels))
                print("Validation accuracy: %.1f%%" % accuracy(
                        valid_prediction.eval(), valid_labels))
        print("Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), test_labels))

    

In [237]:
x = image_size*image_size
y = num_labels
batch_size = 128
l2_weights = 1e-3
hidden_layer_nodes = 1024
layer_number = 2
num_steps = 20000
num_batches = None 
nn_multilayer_hidden_nodes_reduce_function(num_steps,num_batches, x, y, hidden_layer_nodes,layer_number, l2_weights, batch_size,valid_dataset,test_dataset)

1024
1024
Initialized
Minibatch loss at step 0: 640.559692
Minibatch accuracy: 7.0%
Validation accuracy: 33.6%
Minibatch loss at step 4000: 5.990063
Minibatch accuracy: 89.1%
Validation accuracy: 87.4%
Minibatch loss at step 8000: 0.797676
Minibatch accuracy: 79.7%
Validation accuracy: 87.6%
Minibatch loss at step 12000: 0.632559
Minibatch accuracy: 86.7%
Validation accuracy: 88.5%
Minibatch loss at step 16000: 0.397515
Minibatch accuracy: 92.2%
Validation accuracy: 88.7%
Test accuracy: 94.5%


In [252]:
x = image_size*image_size
y = num_labels
batch_size = 128
l2_weights = 1e-3
hidden_layer_nodes = 1024
layer_number = 3
num_steps = 20000
num_batches = None 
nn_multilayer_function(num_steps,num_batches, x, y, hidden_layer_nodes,layer_number, l2_weights, batch_size,valid_dataset,test_dataset)

1024
512 1024
512
Initialized
Minibatch loss at step 0: 4639.091797
Minibatch accuracy: 10.2%
Validation accuracy: 10.0%
Minibatch loss at step 4000: nan
Minibatch accuracy: 10.9%
Validation accuracy: 10.0%
Minibatch loss at step 8000: nan
Minibatch accuracy: 7.0%
Validation accuracy: 10.0%
Minibatch loss at step 12000: nan
Minibatch accuracy: 5.5%
Validation accuracy: 10.0%
Minibatch loss at step 16000: nan
Minibatch accuracy: 8.6%
Validation accuracy: 10.0%
Test accuracy: 10.0%


In [246]:
x = image_size*image_size
y = num_labels
batch_size = 128
l2_weights = 1e-3
hidden_layer_nodes = 1024
layer_number = 4
num_steps = 20000
num_batches = None 
nn_multilayer_function(num_steps,num_batches, x, y, hidden_layer_nodes,layer_number, l2_weights, batch_size,valid_dataset,test_dataset)

128
64 128
32 64
32
Initialized
Minibatch loss at step 0: 1737.109985
Minibatch accuracy: 7.0%
Validation accuracy: 10.0%
Minibatch loss at step 400: nan
Minibatch accuracy: 9.4%
Validation accuracy: 10.0%
Minibatch loss at step 800: nan
Minibatch accuracy: 11.7%
Validation accuracy: 10.0%
Minibatch loss at step 1200: nan
Minibatch accuracy: 4.7%
Validation accuracy: 10.0%
Minibatch loss at step 1600: nan
Minibatch accuracy: 7.8%
Validation accuracy: 10.0%
Test accuracy: 10.0%
