Deep Learning
=============

Assignment 4
------------

Previously in `2_fullyconnected.ipynb` and `3_regularization.ipynb`, we trained fully connected networks to classify [notMNIST](http://yaroslavvb.blogspot.com/2011/09/notmnist-dataset.html) characters.

The goal of this assignment is make the neural network convolutional.

In [1]:
from __future__ import print_function
import numpy as np
import tensorflow as tf
from six.moves import cPickle as pickle
from six.moves import range
from time import time
from tqdm import tqdm_notebook as tqdm
import datetime

  from ._conv import register_converters as _register_converters


In [2]:
pickle_file = 'notMNIST.pickle'

with open(pickle_file, 'rb') as f:
    save = pickle.load(f)
    train_dataset = save['train_dataset']
    train_labels = save['train_labels']
    valid_dataset = save['valid_dataset'][:2000]
    valid_labels = save['valid_labels'][:2000]
    test_dataset = save['test_dataset'][:2000]
    test_labels = save['test_labels'][:2000]
    del save  # hint to help gc free up memory
    print('Training set', train_dataset.shape, train_labels.shape)
    print('Validation set', valid_dataset.shape, valid_labels.shape)
    print('Test set', test_dataset.shape, test_labels.shape)

Training set (200000, 28, 28) (200000,)
Validation set (2000, 28, 28) (2000,)
Test set (2000, 28, 28) (2000,)


Reformat into a TensorFlow-friendly shape:
- convolutions need the image data formatted as a cube (width by height by #channels)
- labels as float 1-hot encodings.

In [3]:
image_size = 28
num_labels = 10
num_channels = 1  # grayscale

import numpy as np


def reformat(dataset, labels):
    dataset = dataset.reshape((-1, image_size, image_size,
                               num_channels)).astype(np.float32)
    labels = (np.arange(num_labels) == labels[:, None]).astype(np.float32)
    return dataset, labels


train_dataset, train_labels = reformat(train_dataset, train_labels)
valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)

Training set (200000, 28, 28, 1) (200000, 10)
Validation set (2000, 28, 28, 1) (2000, 10)
Test set (2000, 28, 28, 1) (2000, 10)


In [4]:
def accuracy(predictions, labels):
    return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1)) /
            predictions.shape[0])

Let's build a small network with two convolutional layers, followed by one fully connected layer. Convolutional networks are more expensive computationally, so we'll limit its depth and number of fully connected nodes.

In [10]:
batch_size = 64

graph = tf.Graph()

with graph.as_default():

    # Input data.
    tf_train_dataset = tf.placeholder(
        tf.float32,
        shape=(batch_size, image_size, image_size, num_channels),
        name='tf_train_dataset')
    tf_train_labels = tf.placeholder(
        tf.float32, shape=(batch_size, num_labels), name='tf_train_labels')
    tf_valid_dataset = tf.constant(valid_dataset, name='tf_valid_dataset')
    tf_valid_labels = tf.constant(valid_labels, name='tf_valid_labels')
    tf_test_dataset = tf.constant(test_dataset, name='tf_test_dataset')
    tf_test_labels = tf.constant(test_labels, name='tf_test_labels')

    # Variables.
    with tf.name_scope('W0'):
        layer0_weights = tf.Variable(
            tf.truncated_normal([5, 5, num_channels, 20], stddev=0.1))
    layer0_biases = tf.Variable(tf.zeros([20]), name='B0')

    with tf.name_scope('W1'):
        layer1_weights = tf.Variable(
            tf.truncated_normal([5, 5, 20, 40], stddev=0.1))
    layer1_biases = tf.Variable(tf.constant(1.0, shape=[40]), name='B1')

    with tf.name_scope('W2'):
        layer2_weights = tf.Variable(
            tf.truncated_normal([5 * 5 * 40, 500], stddev=0.1))
    layer2_biases = tf.Variable(tf.constant(1.0, shape=[500]), name='B2')

    with tf.name_scope('W3'):
        layer3_weights = tf.Variable(
            tf.truncated_normal([500, num_labels], stddev=0.1))
    layer3_biases = tf.Variable(
        tf.constant(1.0, shape=[num_labels]), name='B3')

    # Model.
    def model(data):

        with tf.name_scope('conv0'):
            conv0 = tf.nn.relu(
                tf.nn.conv2d(
                    data, layer0_weights, [1, 1, 1, 1], padding='SAME') +
                layer0_biases)
            pool0 = tf.nn.max_pool(conv0, [1, 2, 2, 1], [1, 2, 2, 1], 'SAME')

        # [batch, 14, 14, 20]

        with tf.name_scope('conv1'):
            conv1 = tf.nn.relu(
                tf.nn.conv2d(
                    pool0, layer1_weights, [1, 1, 1, 1], padding='VALID') +
                layer1_biases)
            # [batch, 12, 12, 40]
            pool1 = tf.nn.max_pool(conv1, [1, 2, 2, 1], [1, 2, 2, 1], 'VALID')

        # [batch, 5, 5, 40]

        shape = pool1.get_shape().as_list()

        reshape = tf.reshape(pool1, [shape[0], shape[1] * shape[2] * shape[3]])

        with tf.name_scope('fc0'):
            hidden0 = tf.nn.relu(
                tf.matmul(reshape, layer2_weights) + layer2_biases)

        with tf.name_scope('fc1'):
            hidden1 = tf.matmul(hidden0, layer3_weights) + layer3_biases

        return hidden1

    # Training computation.
    logits = model(tf_train_dataset)

    with tf.name_scope('loss'):
        loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits_v2(
                labels=tf_train_labels, logits=logits)
        )  # - 0.001*(tf.nn.l2_loss(layer1_weights)+tf.nn.l2_loss(layer2_weights)+tf.nn.l2_loss(layer3_weights)+tf.nn.l2_loss(layer4_weights))

    tf.summary.scalar('loss', loss)

    #tf.summary.histogram('layer0_weights', layer0_weights)
    #tf.summary.histogram('layer0_biases', layer0_biases)
    #tf.summary.histogram('layer1_weights', layer1_weights)
    #tf.summary.histogram('layer1_biases', layer1_biases)
    #tf.summary.histogram('layer2_weights', layer2_weights)
    #tf.summary.histogram('layer2_biases', layer2_biases)
    #tf.summary.histogram('layer3_weights', layer3_weights)
    #tf.summary.histogram('layer3_biases', layer3_biases)

    # Optimizer.
    optimizer = tf.train.AdamOptimizer().minimize(loss)

    # Predictions for the training, validation, and test data.
    train_prediction = tf.nn.softmax(logits)

    with tf.name_scope('predkk'):
        valid_prediction = tf.nn.softmax(model(tf_valid_dataset))
        test_prediction = tf.nn.softmax(model(tf_test_dataset))

    #train_acc = tf.metrics.accuracy(labels=tf.argmax(tf_train_labels, 1), predictions=tf.argmax(train_prediction,1))
    #print(train_acc)
    #tf.summary.scalar('train_acc', train_acc[1])

    #test_acc = tf.metrics.accuracy(labels=tf.argmax(tf_test_labels, 1), predictions=tf.argmax(test_prediction,1))
    #valid_acc = tf.metrics.accuracy(labels=tf.argmax(tf_valid_labels, 1), predictions=tf.argmax(valid_prediction,1))

In [11]:
num_steps = 10001
t0 = time()
with tf.Session(
        graph=graph, config=tf.ConfigProto(log_device_placement=True)) as sess:

    writer = tf.summary.FileWriter(
        './tf_log/' + datetime.datetime.now().strftime('%H:%M:%S'), sess.graph)
    merged = tf.summary.merge_all()

    tf.global_variables_initializer().run()
    #tf.local_variables_initializer().run()

    print('Initialized')
    for step in tqdm(range(num_steps)):

        offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
        batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
        feed_dict = {
            tf_train_dataset: batch_data,
            tf_train_labels: batch_labels
        }

        _, l, predictions, summary = sess.run(
            [optimizer, loss, train_prediction, merged], feed_dict=feed_dict)

        writer.add_summary(summary, step)

        if (step % 2000 == 0):
            print('\nTraining step {}, processed {} entities.'.format(
                step, step * batch_size))
            t = time() - t0
            print('Approx. time remaining: {}'.format(t * (num_steps /
                                                           (step + 1)) - t))
            print('Minibatch loss: {}.'.format(l))
            print('Minibatch accuracy: %.1f%%' % accuracy(
                predictions, batch_labels))
            print('Validation accuracy: %.1f%%' % accuracy(
                valid_prediction.eval(), valid_labels))

    print()
    print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(),
                                             test_labels))
    print('Execution time: {}'.format(time() - t0))
    print(step, num_steps)
    print(batch_size)

Initialized



Training step 0, processed 0 entities.
Approx. time remaining: 3689.5060539245605
Minibatch loss: 9.582714080810547.
Minibatch accuracy: 6.2%
Validation accuracy: 9.4%



KeyboardInterrupt: 

---
Problem 2
---------

Try to get the best performance you can using a convolutional net. Look for example at the classic [LeNet5](http://yann.lecun.com/exdb/lenet/) architecture, adding Dropout, and/or adding learning rate decay. + inception?

---

In [7]:
batch_size = 64

graph = tf.Graph()

with graph.as_default():

    # Input data.
    tf_train_dataset = tf.placeholder(
        tf.float32,
        shape=(batch_size, image_size, image_size, num_channels),
        name='tf_train_dataset')
    tf_train_labels = tf.placeholder(
        tf.float32, shape=(batch_size, num_labels), name='tf_train_labels')
    tf_valid_dataset = tf.constant(valid_dataset, name='tf_valid_dataset')
    tf_valid_labels = tf.constant(valid_labels, name='tf_valid_labels')
    tf_test_dataset = tf.constant(test_dataset, name='tf_test_dataset')
    tf_test_labels = tf.constant(test_labels, name='tf_test_labels')

    L0_3x3_conv_W = tf.Variable(tf.truncated_normal([3, 3, 1, 10], stddev=0.1))
    L0_3x3_conv_B = tf.Variable(tf.zeros([10]))

    L1a_1x1_conv_W = tf.Variable(
        tf.truncated_normal([1, 1, 10, 5], stddev=0.1))
    L1a_1x1_conv_B = tf.Variable(tf.zeros([5]))

    L1b_1x1_conv_W = tf.Variable(
        tf.truncated_normal([1, 1, 10, 5], stddev=0.1))
    L1b_1x1_conv_B = tf.Variable(tf.zeros([5]))
    L1b_3x3_conv_W = tf.Variable(
        tf.truncated_normal([3, 3, 5, 20], stddev=0.1))
    L1b_3x3_conv_B = tf.Variable(tf.zeros([20]))

    L1c_1x1_conv_W = tf.Variable(
        tf.truncated_normal([1, 1, 10, 5], stddev=0.1))
    L1c_1x1_conv_B = tf.Variable(tf.zeros([5]))
    L1c_5x5_conv_W = tf.Variable(
        tf.truncated_normal([5, 5, 5, 20], stddev=0.1))
    L1c_5x5_conv_B = tf.Variable(tf.zeros([20]))

    # 3x3 pool
    L1d_1x1_conv_W = tf.Variable(
        tf.truncated_normal([1, 1, 10, 5], stddev=0.1))
    L1d_1x1_conv_B = tf.Variable(tf.zeros([5]))

    L2_fc_W = tf.Variable(
        tf.truncated_normal([14 * 14 * 50, 1000], stddev=0.1))
    L2_fc_B = tf.Variable(tf.zeros([1000]))

    L3_fc_W = tf.Variable(tf.truncated_normal([1000, num_labels], stddev=0.1))
    L3_fc_B = tf.Variable(tf.zeros([num_labels]))

    # Model
    def model(data):

        with tf.name_scope('L0'):
            L0 = tf.nn.relu(
                tf.nn.conv2d(
                    data, L0_3x3_conv_W, [1, 2, 2, 1], padding='SAME') +
                L0_3x3_conv_B)

        # 28x28x20
        with tf.name_scope('L1a'):
            L1a = tf.nn.relu(
                tf.nn.conv2d(L0, L1a_1x1_conv_W, [1, 1, 1, 1], padding='SAME')
                + L1a_1x1_conv_B)

        with tf.name_scope('L1b'):
            L1b_reduce = tf.nn.relu(
                tf.nn.conv2d(L0, L1b_1x1_conv_W, [1, 1, 1, 1], padding='SAME')
                + L1b_1x1_conv_B)
            L1b = tf.nn.relu(
                tf.nn.conv2d(
                    L1b_reduce, L1b_3x3_conv_W, [1, 1, 1, 1], padding='SAME') +
                L1b_3x3_conv_B)

        with tf.name_scope('L1c'):
            L1c_reduce = tf.nn.relu(
                tf.nn.conv2d(L0, L1c_1x1_conv_W, [1, 1, 1, 1], padding='SAME')
                + L1c_1x1_conv_B)
            L1c = tf.nn.relu(
                tf.nn.conv2d(
                    L1c_reduce, L1c_5x5_conv_W, [1, 1, 1, 1], padding='SAME') +
                L1c_5x5_conv_B)

        with tf.name_scope('L1d'):
            L1d_3x3_pool = tf.layers.max_pooling2d(
                L0, (3, 3), (1, 1), padding='SAME')
            L1d = tf.nn.relu(
                tf.nn.conv2d(
                    L1d_3x3_pool, L1d_1x1_conv_W, [1, 1, 1, 1], padding='SAME')
                + L1d_1x1_conv_B)

        with tf.name_scope('L1'):
            L1 = tf.concat([L1a, L1b, L1c, L1d], axis=3)  # 14x14x64
            L1_reshaped = tf.reshape(L1, (-1, 14 * 14 * 50))

        with tf.name_scope('L2'):
            L2 = tf.nn.relu(tf.matmul(L1_reshaped, L2_fc_W) + L2_fc_B)
            # 6x6x100

        with tf.name_scope('L3'):
            L3 = tf.matmul(L2, L3_fc_W) + L3_fc_B

        return L3

    # Training computation.
    logits = model(tf_train_dataset)

    with tf.name_scope('loss'):
        loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits_v2(
                labels=tf_train_labels, logits=logits)
        )  # - 0.001*(tf.nn.l2_loss(layer1_weights)+tf.nn.l2_loss(layer2_weights)+tf.nn.l2_loss(layer3_weights)+tf.nn.l2_loss(layer4_weights))

    tf.summary.scalar('loss', loss)

    #tf.summary.histogram('layer0_weights', layer0_weights)
    #tf.summary.histogram('layer0_biases', layer0_biases)
    #tf.summary.histogram('layer1_weights', layer1_weights)
    #tf.summary.histogram('layer1_biases', layer1_biases)
    #tf.summary.histogram('layer2_weights', layer2_weights)
    #tf.summary.histogram('layer2_biases', layer2_biases)
    #tf.summary.histogram('layer3_weights', layer3_weights)
    #tf.summary.histogram('layer3_biases', layer3_biases)

    # Optimizer.
    optimizer = tf.train.AdamOptimizer().minimize(loss)

    # Predictions for the training, validation, and test data.
    train_prediction = tf.nn.softmax(logits)

    with tf.name_scope('predkk'):
        valid_prediction = tf.nn.softmax(model(tf_valid_dataset))
        test_prediction = tf.nn.softmax(model(tf_test_dataset))

    train_acc = tf.metrics.accuracy(
        labels=tf.argmax(tf_train_labels, 1),
        predictions=tf.argmax(train_prediction, 1))
    #print(train_acc)
    #tf.summary.scalar('train_acc', train_acc[1])

    test_acc = tf.metrics.accuracy(
        labels=tf.argmax(tf_test_labels, 1),
        predictions=tf.argmax(test_prediction, 1))
    valid_acc = tf.metrics.accuracy(
        labels=tf.argmax(tf_valid_labels, 1),
        predictions=tf.argmax(valid_prediction, 1))

In [9]:
num_steps = 20001
t0 = time()
with tf.Session(
        graph=graph, config=tf.ConfigProto(log_device_placement=True)) as sess:

    writer = tf.summary.FileWriter(
        './tf_log/' + datetime.datetime.now().strftime('%H:%M:%S'), sess.graph)
    merged = tf.summary.merge_all()

    tf.global_variables_initializer().run()
    #tf.local_variables_initializer().run()

    print('Initialized')
    for step in tqdm(range(num_steps)):

        offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
        batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
        feed_dict = {
            tf_train_dataset: batch_data,
            tf_train_labels: batch_labels
        }

        _, l, predictions, summary = sess.run(
            [optimizer, loss, train_prediction, merged], feed_dict=feed_dict)

        writer.add_summary(summary, step)

        if (step % 2000 == 0):
            print('\nTraining step {}, processed {} entities.'.format(
                step, step * batch_size))
            t = time() - t0
            print('Approx. time remaining: {}'.format(t * (num_steps /
                                                           (step + 1)) - t))
            print('Minibatch loss: {}.'.format(l))
            print('Minibatch accuracy: %.1f%%' % accuracy(
                predictions, batch_labels))
            print('Validation accuracy: %.1f%%' % accuracy(
                valid_prediction.eval(), valid_labels))

    print('\n\nEvaluating final performance..')
    print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(),
                                             test_labels))
    print('Execution time: {}'.format(time() - t0))
    print(step, num_steps)
    print(batch_size)

Initialized



Training step 0, processed 0 entities.
Approx. time remaining: 14535.765647888184
Minibatch loss: 2.3434524536132812.
Minibatch accuracy: 6.2%
Validation accuracy: 33.3%

Training step 2000, processed 128000 entities.
Approx. time remaining: 340.3549272974749
Minibatch loss: 0.2577987015247345.
Minibatch accuracy: 92.2%
Validation accuracy: 90.6%

Training step 4000, processed 256000 entities.
Approx. time remaining: 298.9171988008857
Minibatch loss: 0.1727817952632904.
Minibatch accuracy: 95.3%
Validation accuracy: 91.1%

Training step 6000, processed 384000 entities.
Approx. time remaining: 260.7287514350788
Minibatch loss: 0.10226178169250488.
Minibatch accuracy: 96.9%
Validation accuracy: 91.8%

Training step 8000, processed 512000 entities.
Approx. time remaining: 222.9868485143581
Minibatch loss: 0.08383771777153015.
Minibatch accuracy: 98.4%
Validation accuracy: 91.8%

Training step 10000, processed 640000 entities.
Approx. time remaining: 185.56109500782406
Minibatch loss: 0.0

# arch #2

In [13]:
batch_size = 64

graph = tf.Graph()

with graph.as_default():

    # Input data.
    tf_train_dataset = tf.placeholder(
        tf.float32,
        shape=(batch_size, image_size, image_size, num_channels),
        name='tf_train_dataset')
    tf_train_labels = tf.placeholder(
        tf.float32, shape=(batch_size, num_labels), name='tf_train_labels')
    tf_valid_dataset = tf.constant(valid_dataset, name='tf_valid_dataset')
    tf_valid_labels = tf.constant(valid_labels, name='tf_valid_labels')
    tf_test_dataset = tf.constant(test_dataset, name='tf_test_dataset')
    tf_test_labels = tf.constant(test_labels, name='tf_test_labels')

    L0a_3x3_conv_W = tf.Variable(
        tf.truncated_normal([3, 3, 1, 10], stddev=0.1))
    L0a_3x3_conv_B = tf.Variable(tf.zeros([10]))

    L1a_3x3_conv_W = tf.Variable(
        tf.truncated_normal([3, 3, 10, 20], stddev=0.1))
    L1a_3x3_conv_B = tf.Variable(tf.zeros([20]))

    L0b_7x7_conv_W = tf.Variable(
        tf.truncated_normal([7, 7, 1, 20], stddev=0.1))
    L0b_7x7_conv_B = tf.Variable(tf.zeros([20]))

    L1b_3x3_conv_W = tf.Variable(
        tf.truncated_normal([3, 3, 20, 30], stddev=0.1))
    L1b_3x3_conv_B = tf.Variable(tf.zeros([30]))

    enh_W = tf.Variable(tf.truncated_normal([3, 3, 30, 10], stddev=0.1))
    enh_B = tf.Variable(tf.zeros([10]))

    L2_fc_W = tf.Variable(tf.truncated_normal([7 * 7 * 60, 400], stddev=0.1))
    L2_fc_B = tf.Variable(tf.zeros([400]))

    L3_fc_W = tf.Variable(tf.truncated_normal([400, num_labels], stddev=0.1))
    L3_fc_B = tf.Variable(tf.zeros([num_labels]))

    # Model
    def model(data):

        with tf.name_scope('L0a'):
            L0a = tf.nn.relu(
                tf.nn.conv2d(
                    data, L0a_3x3_conv_W, [1, 1, 1, 1], padding='SAME') +
                L0a_3x3_conv_B)
        with tf.name_scope('L1a'):
            L1a_pool = tf.nn.max_pool(
                L0a, (1, 3, 3, 1), (1, 2, 2, 1), padding='SAME')
            L1a = tf.nn.relu(
                tf.nn.conv2d(
                    L1a_pool, L1a_3x3_conv_W, [1, 2, 2, 1], padding='SAME') +
                L1a_3x3_conv_B)

        with tf.name_scope('L0b'):
            L0b = tf.nn.relu(
                tf.nn.conv2d(
                    data, L0b_7x7_conv_W, [1, 1, 1, 1], padding='SAME') +
                L0b_7x7_conv_B)
        with tf.name_scope('L1a'):
            L1b_pool = tf.nn.max_pool(
                L0b, (1, 3, 3, 1), (1, 2, 2, 1), padding='SAME')
            L1b = tf.nn.relu(
                tf.nn.conv2d(
                    L1b_pool, L1b_3x3_conv_W, [1, 2, 2, 1], padding='SAME') +
                L1b_3x3_conv_B)

        with tf.name_scope('enhance'):
            enh_concat = tf.concat([L0a, L0b], axis=3)
            enh_pool = tf.nn.max_pool(
                enh_concat, (1, 3, 3, 1), (1, 2, 2, 1), padding='SAME')
            enh_conv = tf.nn.relu(
                tf.nn.conv2d(enh_pool, enh_W, [1, 2, 2, 1], padding='SAME') +
                enh_B)

        with tf.name_scope('L1'):
            L1 = tf.concat([L1a, L1b, enh_conv], axis=3)
            L1_reshaped = tf.reshape(L1, (-1, 7 * 7 * 60))

        with tf.name_scope('L2'):
            L2 = tf.nn.relu(tf.matmul(L1_reshaped, L2_fc_W) + L2_fc_B)

        with tf.name_scope('L3'):
            L3 = tf.matmul(L2, L3_fc_W) + L3_fc_B

        return L3

    # Training computation.
    logits = model(tf_train_dataset)

    with tf.name_scope('loss'):
        loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits_v2(
                labels=tf_train_labels, logits=logits)
        )  # - 0.001*(tf.nn.l2_loss(layer1_weights)+tf.nn.l2_loss(layer2_weights)+tf.nn.l2_loss(layer3_weights)+tf.nn.l2_loss(layer4_weights))

    tf.summary.scalar('loss', loss)

    #tf.summary.histogram('layer0_weights', layer0_weights)
    #tf.summary.histogram('layer0_biases', layer0_biases)
    #tf.summary.histogram('layer1_weights', layer1_weights)
    #tf.summary.histogram('layer1_biases', layer1_biases)
    #tf.summary.histogram('layer2_weights', layer2_weights)
    #tf.summary.histogram('layer2_biases', layer2_biases)
    #tf.summary.histogram('layer3_weights', layer3_weights)
    #tf.summary.histogram('layer3_biases', layer3_biases)

    # Optimizer.
    optimizer = tf.train.AdamOptimizer().minimize(loss)

    # Predictions for the training, validation, and test data.
    train_prediction = tf.nn.softmax(logits)

    with tf.name_scope('predkk'):
        valid_prediction = tf.nn.softmax(model(tf_valid_dataset))
        test_prediction = tf.nn.softmax(model(tf_test_dataset))

    train_acc = tf.metrics.accuracy(
        labels=tf.argmax(tf_train_labels, 1),
        predictions=tf.argmax(train_prediction, 1))
    #print(train_acc)
    #tf.summary.scalar('train_acc', train_acc[1])

    test_acc = tf.metrics.accuracy(
        labels=tf.argmax(tf_test_labels, 1),
        predictions=tf.argmax(test_prediction, 1))
    valid_acc = tf.metrics.accuracy(
        labels=tf.argmax(tf_valid_labels, 1),
        predictions=tf.argmax(valid_prediction, 1))

In [14]:
num_steps = 40001
t0 = time()
with tf.Session(
        graph=graph, config=tf.ConfigProto(log_device_placement=True)) as sess:

    writer = tf.summary.FileWriter(
        './tf_log/' + datetime.datetime.now().strftime('%H:%M:%S'), sess.graph)
    merged = tf.summary.merge_all()

    tf.global_variables_initializer().run()
    #tf.local_variables_initializer().run()

    print('Initialized')
    for step in tqdm(range(num_steps)):

        offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
        batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
        feed_dict = {
            tf_train_dataset: batch_data,
            tf_train_labels: batch_labels
        }

        _, l, predictions, summary = sess.run(
            [optimizer, loss, train_prediction, merged], feed_dict=feed_dict)

        writer.add_summary(summary, step)

        if (step % 2000 == 0):
            print('\nTraining step {}, processed {} entities.'.format(
                step, step * batch_size))
            t = time() - t0
            print('Approx. time remaining: {}'.format(t * (num_steps /
                                                           (step + 1)) - t))
            print('Minibatch loss: {}.'.format(l))
            print('Minibatch accuracy: %.1f%%' % accuracy(
                predictions, batch_labels))
            print('Validation accuracy: %.1f%%' % accuracy(
                valid_prediction.eval(), valid_labels))

    print('\n\nEvaluating final performance..')
    print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(),
                                             test_labels))
    print('Execution time: {}'.format(time() - t0))
    print(step, num_steps)
    print(batch_size)

Initialized



Training step 0, processed 0 entities.
Approx. time remaining: 19838.876724243164
Minibatch loss: 2.983151435852051.
Minibatch accuracy: 10.9%
Validation accuracy: 16.6%

Training step 2000, processed 128000 entities.
Approx. time remaining: 426.1772088084681
Minibatch loss: 0.25252076983451843.
Minibatch accuracy: 90.6%
Validation accuracy: 90.5%

Training step 4000, processed 256000 entities.
Approx. time remaining: 398.500124295632
Minibatch loss: 0.20339727401733398.
Minibatch accuracy: 92.2%
Validation accuracy: 90.4%

Training step 6000, processed 384000 entities.
Approx. time remaining: 374.99104275581857
Minibatch loss: 0.12019984424114227.
Minibatch accuracy: 96.9%
Validation accuracy: 91.3%

Training step 8000, processed 512000 entities.
Approx. time remaining: 352.1423778479106
Minibatch loss: 0.1561535894870758.
Minibatch accuracy: 95.3%
Validation accuracy: 91.5%

Training step 10000, processed 640000 entities.
Approx. time remaining: 329.73981645033126
Minibatch loss: 0.