Deep Learning
=============

Assignment 3
------------

Previously in `2_fullyconnected.ipynb`, you trained a logistic regression and a neural network model.

The goal of this assignment is to explore regularization techniques.

SOme help from [https://github.com/aymericdamien/TensorFlow-Examples/blob/master/notebooks/2_BasicModels/logistic_regression.ipynb]

### Requires
Data sets should be generated by running by this notebook
http://localhost:8888/notebooks/sandbox/ipynotebook/UD730/1_notmnist-dataprep.ipynb


In [1]:
# These are all the modules we'll be using later. Make sure you can import them
# before proceeding further.
from __future__ import print_function
import numpy as np
import tensorflow as tf
from six.moves import cPickle as pickle
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import mean_squared_error
import time



First reload the data we generated in `1_notmnist.ipynb`; and then reformat into a shape that's more adapted to the models we're going to train:
- data as a flat matrix,
- labels as float 1-hot encodings.

In [2]:
image_size = 28
num_labels = 10
pickle_file = 'notMNIST.pickle'

with open(pickle_file, 'rb') as f:
  save = pickle.load(f)
  train_dataset = save['train_dataset']
  train_labels = save['train_labels']
  valid_dataset = save['valid_dataset']
  valid_labels = save['valid_labels']
  test_dataset = save['test_dataset']
  test_labels = save['test_labels']
  del save  # hint to help gc free up memory
  #print('Training set', train_dataset.shape, train_labels.shape)
  #print('Validation set', valid_dataset.shape, valid_labels.shape)
  #print('Test set', test_dataset.shape, test_labels.shape)

# reshape the image parts 28x28 -> 784
(n,width,height) = train_dataset.shape
train_dataset =  np.reshape(train_dataset,(n,width*height))[0:n]
(n,width,height) = valid_dataset.shape
valid_dataset =  np.reshape(valid_dataset,(n,width*height))[0:n]
(n,width,height) = test_dataset.shape
test_dataset =  np.reshape(test_dataset,(n,width*height))[0:n]

def reformat(dataset, labels):
  dataset = dataset.reshape((-1, image_size * image_size)).astype(np.float32)
  # Map 1 to [0.0, 1.0, 0.0 ...], 2 to [0.0, 0.0, 1.0 ...]
  labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
  return dataset, labels

trainTF_dataset, trainTF_labels = reformat(train_dataset, train_labels)
validTF_dataset, validTF_labels = reformat(valid_dataset, valid_labels)
testTF_dataset, testTF_labels = reformat(test_dataset, test_labels)

print('Training set %s, %s'  % ( train_dataset.shape, train_labels.shape))
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)


Training set (200000, 784), (200000,)
Validation set (10000, 784) (10000,)
Test set (10000, 784) (10000,)


In [3]:
# this part at the top truncate the data to make it easier to manipulate

train_subset = 200000
idx = np.random.randint(train_labels.shape[0], size=train_subset)
train_dataset = train_dataset[idx, :]
train_labels = train_labels[idx]

trainTF_dataset, trainTF_labels = reformat(train_dataset, train_labels)
validTF_dataset, validTF_labels = reformat(valid_dataset, valid_labels)
testTF_dataset, testTF_labels = reformat(test_dataset, test_labels)
print('Training set %s, %s'  % ( train_dataset.shape, train_labels.shape))

Training set (200000, 784), (200000,)


In [4]:
# this will be used for scoring normal vectors
def accuracy(predictions, labels):
  return (100.0 * np.sum(predictions == labels) / predictions.shape[0])

# this will be used for scoring one-hot
def accuracyTF(predictions, labels):
  return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
          / labels.shape[0])

# placeholders for our Tensor flow input and output
X = tf.placeholder(tf.float32, [None, 784])
y = tf.placeholder(tf.float32, [None, 10])
keep_prob = tf.placeholder(tf.float32) # DROP OUT here 

---
Problem 1
---------

Introduce and tune L2 regularization for both logistic and neural network models. Remember that L2 amounts to adding a penalty on the norm of the weights to the loss. In TensorFlow, you can compute the L2 loss for a tensor `t` using `nn.l2_loss(t)`. The right amount of regularization should improve your validation / test accuracy.

---


### Logistic Network with L2 Regularization

Copy from Lesson 1(part2), and add L2.

In [5]:
def SKLogistic():
    clf = LogisticRegression()
    clf.fit(train_dataset, train_labels)

    train_predicted = clf.predict(train_dataset)
    valid_predicted = clf.predict(valid_dataset)
    test_predicted = clf.predict(test_dataset)
    return (accuracy(train_predicted, train_labels), 
            accuracy(valid_predicted, valid_labels), 
            accuracy(test_predicted, test_labels))

start = time.time()
sk_trn_score, sk_vld_score, sk_tst_score = SKLogistic()
print("Training: %.2f%%\t Validation: %.2f%%\t Test: %.2f%%" % (sk_trn_score, sk_vld_score, sk_tst_score))
print("Elapsed Time: %.2f" % (time.time() - start))

Training: 83.51%	 Validation: 81.97%	 Test: 88.93%
Elapsed Time: 1233.30


In [6]:
# the TF way - stuff must be one-hoted

def TFLogistic(batch_size=128, training_epochs=50, display_step=100, learning_rate=0.01, reg_factor=0.01):

    W = tf.Variable(tf.truncated_normal([784, 10]), name="weights")
    b = tf.Variable(tf.truncated_normal([10]), name="biases")

    logits = tf.matmul(X,W) + b
    predict = tf.nn.softmax(tf.matmul(X, W) + b)

    # for L2 regularization we add something here 
    #loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y))
    loss = (tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y)) +
            reg_factor * tf.nn.l2_loss(W))
    
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for epoch in range(training_epochs):
            avg_loss = 0.
            total_batch = int(trainTF_dataset.shape[0]/batch_size)
            for step in range(total_batch):
                offset = (step * batch_size) % (trainTF_labels.shape[0] - batch_size)
                batch_xs = trainTF_dataset[offset:(offset + batch_size), :]
                batch_ys = trainTF_labels[offset:(offset + batch_size)]
                _, l, p = sess.run([optimizer, loss, predict], feed_dict={X: batch_xs, y: batch_ys})
                avg_loss += l / total_batch # add this rounds portion of the loss avg
            if (epoch+1) % display_step == 0:
                print("Epoch: %04d, loss = %.9f" % ( (epoch+1), avg_loss))
            
        trainTF_predicted = sess.run([predict], feed_dict={X: trainTF_dataset})
        validTF_predicted = sess.run([predict], feed_dict={X: validTF_dataset})
        testTF_predicted = sess.run([predict], feed_dict={X: testTF_dataset})

    return (accuracyTF(trainTF_predicted[0], trainTF_labels), 
            accuracyTF(validTF_predicted[0], validTF_labels), 
            accuracyTF(testTF_predicted[0], testTF_labels))


for reg_factor in (0.0, 0.001, 0.005, 0.01):
    start = time.time()
    lg_trn_score, lg_vld_score, lg_tst_score = TFLogistic(reg_factor=reg_factor)
    print("Reg Factor %5.3f: TFLog Training: %.2f%%\t Validation: %.2f%%\t Test: %.2f%%" % (reg_factor,lg_trn_score, lg_vld_score, lg_tst_score))
    print("Elapsed Time: %.2f" % (time.time() - start))



Reg Factor 0.000: TFLog Training: 77.81%	 Validation: 77.19%	 Test: 84.63%
Elapsed Time: 44.70
Reg Factor 0.001: TFLog Training: 81.48%	 Validation: 80.36%	 Test: 87.78%
Elapsed Time: 44.80
Reg Factor 0.005: TFLog Training: 83.77%	 Validation: 82.93%	 Test: 89.71%
Elapsed Time: 44.78
Reg Factor 0.010: TFLog Training: 83.56%	 Validation: 82.88%	 Test: 89.66%
Elapsed Time: 44.48


### Neural Netowk with L2 Regularization

Brining in some of what I learned here [http://localhost:8888/notebooks/sandbox/ipynotebook/tensorflow/NN-MNISTdigits.ipynb] assuming I am running locally


In [7]:
def TFNet(n_hidden1 = 1024, batch_size=128, training_epochs=50, display_step=100, learning_rate=0.01, reg_factor=0.01):
    n_input = trainTF_dataset.shape[1]
    n_classes = trainTF_labels.shape[1]

    # model weights
    hl1   = {'weights':tf.Variable(tf.random_normal([n_input,n_hidden1])),
             'biases':tf.Variable(tf.random_normal([n_hidden1]))}
    outer = {'weights':tf.Variable(tf.random_normal([n_hidden1,n_classes])),
             'biases':tf.Variable(tf.random_normal([n_classes]))}

    layer1 = tf.nn.sigmoid(tf.add(tf.matmul(X, hl1['weights']), hl1['biases']))
    predict = tf.add(tf.matmul(layer1, outer['weights']), outer['biases'])

    # with L2 regularization
    loss = (tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=predict)) +
            reg_factor*tf.nn.l2_loss(hl1['weights']) +
            reg_factor*tf.nn.l2_loss(outer['weights']))
    
    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)

    n_trainingsize = trainTF_labels.shape[0]
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
    
        for epoch in range(training_epochs):
            avg_loss = 0.
            total_batch = int(trainTF_dataset.shape[0]/batch_size)
            for step in range(total_batch):
                offset = (step * batch_size) % (n_trainingsize - batch_size)
                batch_xs = trainTF_dataset[offset:(offset + batch_size), :]
                batch_ys = trainTF_labels[offset:(offset + batch_size)]
                _, l = sess.run([optimizer, loss], feed_dict={X: batch_xs, y: batch_ys})
                avg_loss += l / total_batch # add this rounds portion of the loss avg
            if (epoch+1) % display_step == 0:
                print("Epoch: %04d, loss = %.9f" % ( (epoch+1), avg_loss))
    
        trainTF_predicted = sess.run([predict], feed_dict={X: trainTF_dataset})
        validTF_predicted = sess.run([predict], feed_dict={X: validTF_dataset})
        testTF_predicted = sess.run([predict], feed_dict={X: testTF_dataset})

    return (accuracyTF(trainTF_predicted[0], trainTF_labels), 
            accuracyTF(validTF_predicted[0], validTF_labels), 
            accuracyTF(testTF_predicted[0], testTF_labels))


for reg_factor in (0.0, 0.001, 0.005, 0.01):
    start = time.time()
    nn_trn_score, nn_vld_score, nn_tst_score = TFNet(reg_factor=reg_factor)
    print("Reg Factor %5.3f: TFNet Training: %.2f%%\t Validation: %.2f%%\t Test: %.2f%%" % (reg_factor,nn_trn_score, nn_vld_score, nn_tst_score))
    print("Elapsed Time: %.2f" % (time.time() - start))


Reg Factor 0.000: TFNet Training: 89.27%	 Validation: 83.97%	 Test: 90.50%
Elapsed Time: 712.97
Reg Factor 0.001: TFNet Training: 82.88%	 Validation: 82.15%	 Test: 89.27%
Elapsed Time: 713.24
Reg Factor 0.005: TFNet Training: 81.68%	 Validation: 81.33%	 Test: 88.25%
Elapsed Time: 727.47
Reg Factor 0.010: TFNet Training: 79.94%	 Validation: 79.41%	 Test: 86.42%
Elapsed Time: 743.82


In [8]:
#summarize all scores
print("Train Data Size: %d" % train_dataset.shape[0])
print("SKLog Training: %.2f%%\t Validation: %.2f%%\t Test: %.2f%%" % (lg_trn_score, lg_vld_score, lg_tst_score))
print("TFLog Training: %.2f%%\t Validation: %.2f%%\t Test: %.2f%%" % (nn_trn_score, nn_vld_score, nn_tst_score))
print("TFNet Training: %.2f%%\t Validation: %.2f%%\t Test: %.2f%%" % (sk_trn_score, sk_vld_score, sk_tst_score))



Train Data Size: 200000
SKLog Training: 83.56%	 Validation: 82.88%	 Test: 89.66%
TFLog Training: 79.94%	 Validation: 79.41%	 Test: 86.42%
TFNet Training: 83.51%	 Validation: 81.97%	 Test: 88.93%


---
Problem 2
---------
Let's demonstrate an extreme case of overfitting. Restrict your training data to just a few batches. What happens?

---

In [9]:
# set the original ones aside
sTrainTF_dataset = trainTF_dataset
sTrainTF_labels = trainTF_labels


print("batch_size 64")
# better yet take a random selection
for subset_size in (100, 500, 1000, 2000, 3000, 4000, 5000):
    idx = np.random.randint(sTrainTF_labels.shape[0], size=subset_size)
    trainTF_dataset = sTrainTF_dataset[idx, :]
    trainTF_labels = sTrainTF_labels[idx,:]
    start = time.time()
    nn_trn_score, nn_vld_score, nn_tst_score = TFNet(batch_size=128, display_step=1000, reg_factor=0.001)
    print("Size %5d: TFNet Training: %.2f%%\t Validation: %.2f%%\t Test: %.2f%%" % (subset_size, nn_trn_score, nn_vld_score, nn_tst_score))
    print("Elapsed Time: %.2f" % (time.time() - start))

print("")
print("batch_size 128")
# better yet take a random selection
for subset_size in (100, 500, 1000, 2000, 3000, 4000, 5000):
    idx = np.random.randint(sTrainTF_labels.shape[0], size=subset_size)
    trainTF_dataset = sTrainTF_dataset[idx, :]
    trainTF_labels = sTrainTF_labels[idx,:]
    start = time.time()
    nn_trn_score, nn_vld_score, nn_tst_score = TFNet(batch_size=128, display_step=1000, reg_factor=0.001)
    print("Size %5d: TFNet Training: %.2f%%\t Validation: %.2f%%\t Test: %.2f%%" % (subset_size, nn_trn_score, nn_vld_score, nn_tst_score))
    print("Elapsed Time: %.2f" % (time.time() - start))

print("")
print("batch size 1024")
for subset_size in (3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000):
    idx = np.random.randint(sTrainTF_labels.shape[0], size=subset_size)
    trainTF_dataset = sTrainTF_dataset[idx, :]
    trainTF_labels = sTrainTF_labels[idx,:]
    start = time.time()
    nn_trn_score, nn_vld_score, nn_tst_score = TFNet(batch_size=1024, display_step=1000, reg_factor=0.001)
    print("Size %5d: TFNet Training: %.2f%%\t Validation: %.2f%%\t Test: %.2f%%" % (subset_size, nn_trn_score, nn_vld_score, nn_tst_score))
    print("Elapsed Time: %.2f" % (time.time() - start))

    


# restore the old values
trainTF_dataset = sTrainTF_dataset
trainTF_labels = sTrainTF_labels


batch_size 64
Size   100: TFNet Training: 6.00%	 Validation: 7.45%	 Test: 6.91%
Elapsed Time: 0.78
Size   500: TFNet Training: 90.60%	 Validation: 58.48%	 Test: 63.91%
Elapsed Time: 2.23
Size  1000: TFNet Training: 97.50%	 Validation: 75.49%	 Test: 82.49%
Elapsed Time: 4.21
Size  2000: TFNet Training: 93.15%	 Validation: 75.55%	 Test: 82.81%
Elapsed Time: 7.77
Size  3000: TFNet Training: 91.57%	 Validation: 76.42%	 Test: 83.59%
Elapsed Time: 11.56
Size  4000: TFNet Training: 88.70%	 Validation: 74.84%	 Test: 82.63%
Elapsed Time: 15.70
Size  5000: TFNet Training: 90.70%	 Validation: 79.07%	 Test: 85.98%
Elapsed Time: 19.52

batch_size 128
Size   100: TFNet Training: 11.00%	 Validation: 8.15%	 Test: 7.57%
Elapsed Time: 1.21
Size   500: TFNet Training: 89.60%	 Validation: 55.22%	 Test: 60.86%
Elapsed Time: 2.70
Size  1000: TFNet Training: 97.10%	 Validation: 75.48%	 Test: 83.28%
Elapsed Time: 4.45
Size  2000: TFNet Training: 92.10%	 Validation: 74.34%	 Test: 81.99%
Elapsed Time: 8.51
Size

---
Problem 3
---------
Introduce Dropout on the hidden layer of the neural network. Remember: Dropout should only be introduced during training, not evaluation, otherwise your evaluation results would be stochastic as well. TensorFlow provides `nn.dropout()` for that, but you have to make sure it's only inserted during training.

What happens to our extreme overfitting case?

---

In [10]:
#  hmmm... how to introduce dropout... I guess google :-)
# for now it just does relu instead of sigmoid
def TFNetDropout(n_hidden1 = 1024, batch_size=128, training_epochs=50, 
                 display_step=100, learning_rate=0.01, reg_factor=0.01, keep_factor=.5):
    n_input = trainTF_dataset.shape[1]
    n_classes = trainTF_labels.shape[1]
    #keep_prob = tf.placeholder(tf.float32)
    
    # model weights
    hl1   = {'weights':tf.Variable(tf.random_normal([n_input,n_hidden1])),
             'biases':tf.Variable(tf.random_normal([n_hidden1]))}
    outer = {'weights':tf.Variable(tf.random_normal([n_hidden1,n_classes])),
             'biases':tf.Variable(tf.random_normal([n_classes]))}

    layer1 = tf.nn.sigmoid(tf.add(tf.matmul(X, hl1['weights']), hl1['biases']))
    drop_out = tf.nn.dropout(layer1, keep_prob ) # apply drop-out to the hidden layer
    predict = tf.add(tf.matmul(drop_out, outer['weights']), outer['biases'])

    # with L2 regularization
    loss = (tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=predict)) +
            reg_factor*tf.nn.l2_loss(hl1['weights']) +
            reg_factor*tf.nn.l2_loss(outer['weights']))
    
    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)

    n_trainingsize = trainTF_labels.shape[0]
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
    
        for epoch in range(training_epochs):
            avg_loss = 0.
            total_batch = int(trainTF_dataset.shape[0]/batch_size)
            for step in range(total_batch):
                offset = (step * batch_size) % (n_trainingsize - batch_size)
                batch_xs = trainTF_dataset[offset:(offset + batch_size), :]
                batch_ys = trainTF_labels[offset:(offset + batch_size)]
                _, l = sess.run([optimizer, loss], feed_dict={X: batch_xs, y: batch_ys, keep_prob: keep_factor})
                avg_loss += l / total_batch # add this rounds portion of the loss avg
            if (epoch+1) % display_step == 0:
                print("Epoch: %04d, loss = %.9f" % ( (epoch+1), avg_loss))
    
        trainTF_predicted = sess.run([predict], feed_dict={X: trainTF_dataset, keep_prob:1.0})
        validTF_predicted = sess.run([predict], feed_dict={X: validTF_dataset, keep_prob:1.0})
        testTF_predicted = sess.run([predict], feed_dict={X: testTF_dataset, keep_prob:1.0})

    return (accuracyTF(trainTF_predicted[0], trainTF_labels), 
            accuracyTF(validTF_predicted[0], validTF_labels), 
            accuracyTF(testTF_predicted[0], testTF_labels))

    
for reg_factor in (0.0, 0.001):
    start = time.time()
    nn_trn_score, nn_vld_score, nn_tst_score = TFNetDropout(reg_factor=reg_factor)
    print("Reg Factor %5.3f: TFNetDropout Training: %.2f%%\t Validation: %.2f%%\t Test: %.2f%%" % (reg_factor, nn_trn_score, nn_vld_score, nn_tst_score))
    print("Elapsed Time: %.2f" % (time.time() - start))


Reg Factor 0.000: TFNetDropout Training: 85.63%	 Validation: 83.98%	 Test: 90.91%
Elapsed Time: 796.72
Reg Factor 0.001: TFNetDropout Training: 81.98%	 Validation: 81.30%	 Test: 88.81%
Elapsed Time: 794.94


In [11]:
# run some extreme overfitting;
# set the original ones aside
sTrainTF_dataset = trainTF_dataset
sTrainTF_labels = trainTF_labels

print("batch_size 2000")
idx = np.random.randint(sTrainTF_labels.shape[0], size=2000)
trainTF_dataset = sTrainTF_dataset[idx, :]
trainTF_labels = sTrainTF_labels[idx,:]

# better yet take a random selection
for keep_factor in (.5, .7, .9, 1.):
    start = time.time()
    nn_trn_score, nn_vld_score, nn_tst_score = TFNetDropout(training_epochs=100, reg_factor=0.001, keep_factor=keep_factor)
    print("keep %.2f: TFNetDropout Training: %.2f%%\t Validation: %.2f%%\t Test: %.2f%%" % (keep_factor, nn_trn_score, nn_vld_score, nn_tst_score))
    print("Elapsed Time: %.2f" % (time.time() - start))
#nn_trn_score, nn_vld_score, nn_tst_score = TFNetDropout(reg_factor=0.001, keep_factor=0.5)
#print("keep %2f: TFNetDropout Training: %.2f%%\t Validation: %.2f%%\t Test: %.2f%%" % (keep_factor, nn_trn_score, nn_vld_score, nn_tst_score))


# restore the old values
trainTF_dataset = sTrainTF_dataset
trainTF_labels = sTrainTF_labels

batch_size 2000
Epoch: 0100, loss = 2.326414466
keep 0.50: TFNetDropout Training: 94.20%	 Validation: 78.36%	 Test: 85.63%
Elapsed Time: 18.08
Epoch: 0100, loss = 1.642942373
keep 0.70: TFNetDropout Training: 93.90%	 Validation: 77.89%	 Test: 85.45%
Elapsed Time: 17.15
Epoch: 0100, loss = 1.423363598
keep 0.90: TFNetDropout Training: 92.90%	 Validation: 76.00%	 Test: 82.58%
Elapsed Time: 17.10
Epoch: 0100, loss = 1.215106161
keep 1.00: TFNetDropout Training: 91.65%	 Validation: 74.83%	 Test: 82.15%
Elapsed Time: 17.78


---
Problem 4
---------

Try to get the best performance you can using a multi-layer model! The best reported test accuracy using a deep network is [97.1%](http://yaroslavvb.blogspot.com/2011/09/notmnist-dataset.html?showComment=1391023266211#c8758720086795711595).

One avenue you can explore is to add multiple layers.

Another one is to use learning rate decay:

    global_step = tf.Variable(0)  # count the number of steps taken.
    learning_rate = tf.train.exponential_decay(0.5, global_step, ...)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
 
 ---


In [12]:
# assumes train,test,valid dataset/labels as  trainTF_dataset are all defined

def TFNetBest(n_hidden1 = 1024, n_hidden2 = 512, training_epochs=100, batch_size = 128,
                 display_step=200, learning_rate=0.001, reg_factor=0.001, keep_factor=.5):
    n_input = trainTF_dataset.shape[1]
    n_classes = trainTF_labels.shape[1]
    
    # model weights
    hl1   = {'weights':tf.Variable(tf.random_normal([n_input,n_hidden1])),
             'biases':tf.Variable(tf.random_normal([n_hidden1]))}
    hl2   = {'weights':tf.Variable(tf.random_normal([n_hidden1,n_hidden2])),
             'biases':tf.Variable(tf.random_normal([n_hidden2]))}
    outer = {'weights':tf.Variable(tf.random_normal([n_hidden2,n_classes])),
             'biases':tf.Variable(tf.random_normal([n_classes]))} 
    #global_step = tf.Variable(0) # count the number of steps taken
    #learning_rate 
    
    layer1 = tf.nn.sigmoid(tf.add(tf.matmul(X, hl1['weights']), hl1['biases']))
    drop_out = tf.nn.dropout(layer1, keep_prob ) # apply drop-out to the hidden layer
    layer2 = tf.nn.sigmoid(tf.add(tf.matmul(drop_out, hl2['weights']), hl2['biases']))
    predict = tf.add(tf.matmul(layer2, outer['weights']), outer['biases'])

    # with L2 regularization
    loss = (tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=predict)) +
            reg_factor*tf.nn.l2_loss(hl1['weights']) +
            reg_factor*tf.nn.l2_loss(hl2['weights']) +
            reg_factor*tf.nn.l2_loss(outer['weights']))
    
    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)


    n_trainingsize = trainTF_labels.shape[0]
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
    
        for epoch in range(training_epochs):
            avg_loss = 0.
            total_batch = int(trainTF_dataset.shape[0]/batch_size)
            for step in range(total_batch):
                offset = (step * batch_size) % (n_trainingsize - batch_size)
                batch_xs = trainTF_dataset[offset:(offset + batch_size), :]
                batch_ys = trainTF_labels[offset:(offset + batch_size)]
                _, l = sess.run([optimizer, loss], feed_dict={X: batch_xs, y: batch_ys, keep_prob: keep_factor})
                avg_loss += l / total_batch # add this rounds portion of the loss avg
            if (epoch+1) % display_step == 0:
                print("Epoch: %04d, loss = %.9f" % ( (epoch+1), avg_loss))
    
        trainTF_predicted = sess.run([predict], feed_dict={X: trainTF_dataset, keep_prob:1.0})
        validTF_predicted = sess.run([predict], feed_dict={X: validTF_dataset, keep_prob:1.0})
        testTF_predicted = sess.run([predict], feed_dict={X: testTF_dataset, keep_prob:1.0})

    return (accuracyTF(trainTF_predicted[0], trainTF_labels), 
            accuracyTF(validTF_predicted[0], validTF_labels), 
            accuracyTF(testTF_predicted[0], testTF_labels))    

In [13]:
#grab a smaller set to test with

#print("batch_size 2000")
#idx = np.random.randint(sTrainTF_labels.shape[0], size=2000)
#trainTF_dataset = sTrainTF_dataset[idx, :]
#trainTF_labels = sTrainTF_labels[idx,:]

# better yet take a random selection
for n_hidden_layer2 in (128, 512, 1024):
    start = time.time()
    nn_trn_score, nn_vld_score, nn_tst_score = TFNetBest(n_hidden2=n_hidden_layer2)
    print("hidden layer2 %4d: TFNet/Best Training: %.2f%%\t Validation: %.2f%%\t Test: %.2f%%" % (n_hidden_layer2, nn_trn_score, nn_vld_score, nn_tst_score))
    print("Elapsed Time: %.2f" % (time.time() - start))
#nn_trn_score, nn_vld_score, nn_tst_score = TFNetBest(reg_factor=0.001, keep_factor=0.5)
#print("keep %2f: TFNetDropout Training: %.2f%%\t Validation: %.2f%%\t Test: %.2f%%" % (keep_factor, nn_trn_score, nn_vld_score, nn_tst_score))


# restore the old values
#trainTF_dataset = sTrainTF_dataset
#trainTF_labels = sTrainTF_labels

hidden layer2  128: TFNet/Best Training: 85.78%	 Validation: 85.00%	 Test: 91.68%
Elapsed Time: 1777.21
hidden layer2  512: TFNet/Best Training: 85.82%	 Validation: 85.13%	 Test: 91.88%
Elapsed Time: 3046.73
hidden layer2 1024: TFNet/Best Training: 85.88%	 Validation: 85.14%	 Test: 91.81%
Elapsed Time: 4362.33
