## Linear and Logistic Regression - 03  ##
  
  
**Reference**
- TJ Tsai's TensorFlow: 2維空間的線性回歸
- [Deep MNIST for Experts](https://www.tensorflow.org/get_started/mnist/pros)
- [Optimizers](https://www.tensorflow.org/api_guides/python/train#Optimizers)
- [Getting Started With TensorFlow - tf.train API](https://www.tensorflow.org/get_started/get_started#tftrain_api)
- [tf.train.GradientDescentOptimizer](https://www.tensorflow.org/api_docs/python/tf/train/GradientDescentOptimizer)
- [tf.train.AdagradOptimizer](https://www.tensorflow.org/api_docs/python/tf/train/AdagradOptimizer), [PAPER](http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf), [Introduction](http://cs.stanford.edu/~ppasupat/a9online/uploads/proximal_notes.pdf)
- [tf.train.RMSPropOptimizer](https://www.tensorflow.org/api_docs/python/tf/train/RMSPropOptimizer), [PAPER](http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf)
- [tf.train.AdamOptimizer](https://www.tensorflow.org/api_docs/python/tf/train/AdamOptimizer), [PAPER](http://arxiv.org/abs/1412.6980)
- [Stanford - CS231n: Convolutional Neural Networks for Visual Recognition - Optimization: Stochastic Gradient Descent](http://cs231n.github.io/optimization-1/)
- [An overview of gradient descent optimization algorithms](https://arxiv.org/abs/1609.04747)


In [7]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function


from tensorflow.examples.tutorials.mnist import input_data as mnist_data

import argparse
import sys
import tempfile
import time

import tensorflow as tf


FLAGS = None



def INFO_LOG(info):
    print("[{}] => {}".format(time.strftime("%Y-%m-%d %X", time.localtime()), info))
    

In [8]:
def deepnn(x):
    """deepnn builds the graph for a deep net for classifying digits.

    Args:
        x: an input tensor with the dimensions (N_examples, 784), where 784 is the
        number of pixels in a standard MNIST image.

    Returns:
        A tuple (y, keep_prob). y is a tensor of shape (N_examples, 10), with values
        equal to the logits of classifying the digit into one of 10 classes (the
        digits 0-9). keep_prob is a scalar placeholder for the probability of
        dropout.
    """
    # Reshape to use within a convolutional neural net.
    # Last dimension is for "features" - there is only one here, since images are
    # grayscale -- it would be 3 for an RGB image, 4 for RGBA, etc.
    with tf.name_scope("reshape"):
        x_image = tf.reshape(x, [-1, 28, 28, 1])

    # First convolutional layer - maps one grayscale image to 32 feature maps.
    with tf.name_scope("conv1"):
        W_conv1 = weight_variable([5, 5, 1, 32])
        b_conv1 = bias_variable([32])
        h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)

    # Pooling layer - downsamples by 2X.
    with tf.name_scope("pool1"):
        h_pool1 = max_pool_2x2(h_conv1)

    # Second convolutional layer -- maps 32 feature maps to 64.
    with tf.name_scope("conv2"):
        W_conv2 = weight_variable([5, 5, 32, 64])
        b_conv2 = bias_variable([64])
        h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)

    # Second pooling layer.
    with tf.name_scope("pool2"):
        h_pool2 = max_pool_2x2(h_conv2)

    # Fully connected layer 1 -- after 2 round of downsampling, our 28x28 image
    # is down to 7x7x64 feature maps -- maps this to 1024 features.
    with tf.name_scope("fc1"):
        W_fc1 = weight_variable([7 * 7 * 64, 1024])
        b_fc1 = bias_variable([1024])

        h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
        h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

    # Dropout - controls the complexity of the model, prevents co-adaptation of
    # features.
    with tf.name_scope("dropout"):
        keep_prob = tf.placeholder(tf.float32)
        h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

    # Map the 1024 features to 10 classes, one for each digit
    with tf.name_scope("fc2"):
        W_fc2 = weight_variable([1024, 10])
        b_fc2 = bias_variable([10])

        y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
    return y_conv, keep_prob


def conv2d(x, W):
    """conv2d returns a 2d convolution layer with full stride."""
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding="SAME")


def max_pool_2x2(x):
    """max_pool_2x2 downsamples a feature map by 2X."""
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")


def weight_variable(shape):
    """weight_variable generates a weight variable of a given shape."""
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)


def bias_variable(shape):
    """bias_variable generates a bias variable of a given shape."""
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)


In [9]:
def run():
    learning_rate = 1e-4
    train_count = 10000 #20000

    # Import data
    mnist = mnist_data.read_data_sets("./data/mnist", one_hot=True)

    # Create the model
    x = tf.placeholder(tf.float32, [None, 784])

    # Define loss and optimizer
    y_ = tf.placeholder(tf.float32, [None, 10])

    # Build the graph for the deep net
    y_conv, keep_prob = deepnn(x)

    with tf.name_scope("loss"):
        cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv)

    cross_entropy = tf.reduce_mean(cross_entropy)

    with tf.name_scope("adam_optimizer"):
        optimizer_1 = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cross_entropy)

    with tf.name_scope("gradient_descent_optimizer"):
        optimizer_2 = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cross_entropy)

    with tf.name_scope("rms_prop_optimizer"):
        optimizer_3 = tf.train.RMSPropOptimizer(learning_rate=learning_rate).minimize(cross_entropy)

    with tf.name_scope("accuracy"):
        correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
        correct_prediction = tf.cast(correct_prediction, tf.float32)
    accuracy = tf.reduce_mean(correct_prediction)

    INFO_LOG("")

    #graph_location = tempfile.mkdtemp()
    #print(" ==> Saving graph to: {}".format(graph_location))
    train_writer = tf.summary.FileWriter("./graph/mnist_deep")
    train_writer.add_graph(tf.get_default_graph())

    INFO_LOG("")

    with tf.Session() as session_1:
        session_1.run(tf.global_variables_initializer())
        for i in range(train_count):
            batch = mnist.train.next_batch(50)

            if i % 100 == 0 or train_count - 1 == i:
                train_accuracy = accuracy.eval(feed_dict={x: batch[0], y_: batch[1], keep_prob: 1.0})
                INFO_LOG("[Train][Adam] - step {}, training accuracy {}".format(i, train_accuracy))

            optimizer_1.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})


        INFO_LOG("")
        INFO_LOG("[Test][Adam] - test accuracy {}".format(accuracy.eval(
                                                        feed_dict={x: mnist.test.images,
                                                        y_: mnist.test.labels,
                                                        keep_prob: 1.0})))
        INFO_LOG("")

    with tf.Session() as session_2:
        session_2.run(tf.global_variables_initializer())

        for i in range(train_count):
            batch = mnist.train.next_batch(50)

            if i % 100 == 0 or train_count - 1 == i:
                train_accuracy = accuracy.eval(feed_dict={x: batch[0], y_: batch[1], keep_prob: 1.0})
                INFO_LOG("[Train][GradientDescent] - step {}, training accuracy {}".format(i, train_accuracy))

            optimizer_2.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})


        INFO_LOG("")
        INFO_LOG("[Test][GradientDescent] - test accuracy {}".format(accuracy.eval(
                                                        feed_dict={x: mnist.test.images,
                                                        y_: mnist.test.labels,
                                                        keep_prob: 1.0})))
        INFO_LOG("")

    with tf.Session() as session_3:
        session_3.run(tf.global_variables_initializer())

        for i in range(train_count):
            batch = mnist.train.next_batch(50)

            if i % 100 == 0 or train_count - 1 == i:
                train_accuracy = accuracy.eval(feed_dict={x: batch[0], y_: batch[1], keep_prob: 1.0})
                INFO_LOG("[Train][RMSProp] - step {}, training accuracy {}".format(i, train_accuracy))

            optimizer_3.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})


        INFO_LOG("")
        INFO_LOG("[Test][RMSProp] - test accuracy {}".format(accuracy.eval(
                                                        feed_dict={x: mnist.test.images,
                                                        y_: mnist.test.labels,
                                                        keep_prob: 1.0})))
        INFO_LOG("")

        

#### Output (Adam)  ####
  
```  
[2017-07-03 13:41:25] => [Train][Adam] - step 0, training accuracy 0.03999999910593033
[2017-07-03 13:41:34] => [Train][Adam] - step 100, training accuracy 0.8600000143051147
[2017-07-03 13:41:44] => [Train][Adam] - step 200, training accuracy 0.800000011920929
[2017-07-03 13:41:54] => [Train][Adam] - step 300, training accuracy 0.9200000166893005
[2017-07-03 13:42:04] => [Train][Adam] - step 400, training accuracy 0.9399999976158142
[2017-07-03 13:42:14] => [Train][Adam] - step 500, training accuracy 0.9599999785423279
[2017-07-03 13:42:23] => [Train][Adam] - step 600, training accuracy 0.9599999785423279
[2017-07-03 13:42:33] => [Train][Adam] - step 700, training accuracy 0.9800000190734863
[2017-07-03 13:42:43] => [Train][Adam] - step 800, training accuracy 0.9599999785423279
[2017-07-03 13:42:53] => [Train][Adam] - step 900, training accuracy 0.9200000166893005
[2017-07-03 13:43:03] => [Train][Adam] - step 1000, training accuracy 0.9800000190734863
[2017-07-03 13:43:13] => [Train][Adam] - step 1100, training accuracy 1.0
[2017-07-03 13:43:23] => [Train][Adam] - step 1200, training accuracy 0.9399999976158142
[2017-07-03 13:43:33] => [Train][Adam] - step 1300, training accuracy 0.9399999976158142
[2017-07-03 13:43:42] => [Train][Adam] - step 1400, training accuracy 0.9200000166893005
[2017-07-03 13:43:52] => [Train][Adam] - step 1500, training accuracy 0.9800000190734863
[2017-07-03 13:44:02] => [Train][Adam] - step 1600, training accuracy 0.9599999785423279
[2017-07-03 13:44:11] => [Train][Adam] - step 1700, training accuracy 0.9599999785423279
[2017-07-03 13:44:21] => [Train][Adam] - step 1800, training accuracy 0.9800000190734863
[2017-07-03 13:44:31] => [Train][Adam] - step 1900, training accuracy 0.9599999785423279
[2017-07-03 13:44:40] => [Train][Adam] - step 2000, training accuracy 1.0
[2017-07-03 13:44:50] => [Train][Adam] - step 2100, training accuracy 0.9800000190734863
[2017-07-03 13:45:00] => [Train][Adam] - step 2200, training accuracy 1.0
[2017-07-03 13:45:10] => [Train][Adam] - step 2300, training accuracy 0.9800000190734863
[2017-07-03 13:45:19] => [Train][Adam] - step 2400, training accuracy 0.9599999785423279
[2017-07-03 13:45:29] => [Train][Adam] - step 2500, training accuracy 1.0
[2017-07-03 13:45:39] => [Train][Adam] - step 2600, training accuracy 1.0
[2017-07-03 13:45:48] => [Train][Adam] - step 2700, training accuracy 0.9399999976158142
[2017-07-03 13:45:58] => [Train][Adam] - step 2800, training accuracy 0.9800000190734863
[2017-07-03 13:46:08] => [Train][Adam] - step 2900, training accuracy 0.9800000190734863
[2017-07-03 13:46:18] => [Train][Adam] - step 3000, training accuracy 1.0
[2017-07-03 13:46:27] => [Train][Adam] - step 3100, training accuracy 0.9599999785423279
[2017-07-03 13:46:37] => [Train][Adam] - step 3200, training accuracy 1.0
[2017-07-03 13:46:47] => [Train][Adam] - step 3300, training accuracy 1.0
[2017-07-03 13:46:57] => [Train][Adam] - step 3400, training accuracy 0.9800000190734863
[2017-07-03 13:47:06] => [Train][Adam] - step 3500, training accuracy 1.0
[2017-07-03 13:47:16] => [Train][Adam] - step 3600, training accuracy 0.9800000190734863
[2017-07-03 13:47:26] => [Train][Adam] - step 3700, training accuracy 0.9800000190734863
[2017-07-03 13:47:36] => [Train][Adam] - step 3800, training accuracy 0.9800000190734863
[2017-07-03 13:47:46] => [Train][Adam] - step 3900, training accuracy 1.0
[2017-07-03 13:47:55] => [Train][Adam] - step 4000, training accuracy 1.0
[2017-07-03 13:48:05] => [Train][Adam] - step 4100, training accuracy 0.9599999785423279
[2017-07-03 13:48:15] => [Train][Adam] - step 4200, training accuracy 1.0
[2017-07-03 13:48:25] => [Train][Adam] - step 4300, training accuracy 0.9800000190734863
[2017-07-03 13:48:34] => [Train][Adam] - step 4400, training accuracy 1.0
[2017-07-03 13:48:44] => [Train][Adam] - step 4500, training accuracy 0.9599999785423279
[2017-07-03 13:48:54] => [Train][Adam] - step 4600, training accuracy 0.9800000190734863
[2017-07-03 13:49:04] => [Train][Adam] - step 4700, training accuracy 0.9800000190734863
[2017-07-03 13:49:13] => [Train][Adam] - step 4800, training accuracy 1.0
[2017-07-03 13:49:23] => [Train][Adam] - step 4900, training accuracy 1.0
[2017-07-03 13:49:33] => [Train][Adam] - step 5000, training accuracy 1.0
[2017-07-03 13:49:43] => [Train][Adam] - step 5100, training accuracy 0.9599999785423279
[2017-07-03 13:49:52] => [Train][Adam] - step 5200, training accuracy 0.9800000190734863
[2017-07-03 13:50:02] => [Train][Adam] - step 5300, training accuracy 0.9800000190734863
[2017-07-03 13:50:12] => [Train][Adam] - step 5400, training accuracy 1.0
[2017-07-03 13:50:22] => [Train][Adam] - step 5500, training accuracy 0.9599999785423279
[2017-07-03 13:50:31] => [Train][Adam] - step 5600, training accuracy 1.0
[2017-07-03 13:50:41] => [Train][Adam] - step 5700, training accuracy 1.0
[2017-07-03 13:50:51] => [Train][Adam] - step 5800, training accuracy 0.9800000190734863
[2017-07-03 13:51:01] => [Train][Adam] - step 5900, training accuracy 0.9800000190734863
[2017-07-03 13:51:11] => [Train][Adam] - step 6000, training accuracy 1.0
[2017-07-03 13:51:21] => [Train][Adam] - step 6100, training accuracy 0.9800000190734863
[2017-07-03 13:51:31] => [Train][Adam] - step 6200, training accuracy 0.9800000190734863
[2017-07-03 13:51:40] => [Train][Adam] - step 6300, training accuracy 1.0
[2017-07-03 13:51:50] => [Train][Adam] - step 6400, training accuracy 1.0
[2017-07-03 13:52:00] => [Train][Adam] - step 6500, training accuracy 1.0
[2017-07-03 13:52:10] => [Train][Adam] - step 6600, training accuracy 1.0
[2017-07-03 13:52:19] => [Train][Adam] - step 6700, training accuracy 1.0
[2017-07-03 13:52:29] => [Train][Adam] - step 6800, training accuracy 1.0
[2017-07-03 13:52:39] => [Train][Adam] - step 6900, training accuracy 1.0
[2017-07-03 13:52:49] => [Train][Adam] - step 7000, training accuracy 1.0
[2017-07-03 13:52:59] => [Train][Adam] - step 7100, training accuracy 1.0
[2017-07-03 13:53:08] => [Train][Adam] - step 7200, training accuracy 1.0
[2017-07-03 13:53:18] => [Train][Adam] - step 7300, training accuracy 1.0
[2017-07-03 13:53:28] => [Train][Adam] - step 7400, training accuracy 0.9599999785423279
[2017-07-03 13:53:38] => [Train][Adam] - step 7500, training accuracy 1.0
[2017-07-03 13:53:48] => [Train][Adam] - step 7600, training accuracy 1.0
[2017-07-03 13:53:57] => [Train][Adam] - step 7700, training accuracy 1.0
[2017-07-03 13:54:07] => [Train][Adam] - step 7800, training accuracy 1.0
[2017-07-03 13:54:17] => [Train][Adam] - step 7900, training accuracy 1.0
[2017-07-03 13:54:27] => [Train][Adam] - step 8000, training accuracy 0.9800000190734863
[2017-07-03 13:54:36] => [Train][Adam] - step 8100, training accuracy 1.0
[2017-07-03 13:54:46] => [Train][Adam] - step 8200, training accuracy 1.0
[2017-07-03 13:54:56] => [Train][Adam] - step 8300, training accuracy 0.9800000190734863
[2017-07-03 13:55:06] => [Train][Adam] - step 8400, training accuracy 1.0
[2017-07-03 13:55:16] => [Train][Adam] - step 8500, training accuracy 1.0
[2017-07-03 13:55:26] => [Train][Adam] - step 8600, training accuracy 1.0
[2017-07-03 13:55:35] => [Train][Adam] - step 8700, training accuracy 0.9800000190734863
[2017-07-03 13:55:45] => [Train][Adam] - step 8800, training accuracy 0.9800000190734863
[2017-07-03 13:55:55] => [Train][Adam] - step 8900, training accuracy 0.9800000190734863
[2017-07-03 13:56:05] => [Train][Adam] - step 9000, training accuracy 1.0
[2017-07-03 13:56:15] => [Train][Adam] - step 9100, training accuracy 1.0
[2017-07-03 13:56:24] => [Train][Adam] - step 9200, training accuracy 1.0
[2017-07-03 13:56:34] => [Train][Adam] - step 9300, training accuracy 0.9800000190734863
[2017-07-03 13:56:44] => [Train][Adam] - step 9400, training accuracy 1.0
[2017-07-03 13:56:54] => [Train][Adam] - step 9500, training accuracy 1.0
[2017-07-03 13:57:04] => [Train][Adam] - step 9600, training accuracy 1.0
[2017-07-03 13:57:14] => [Train][Adam] - step 9700, training accuracy 1.0
[2017-07-03 13:57:23] => [Train][Adam] - step 9800, training accuracy 1.0
[2017-07-03 13:57:33] => [Train][Adam] - step 9900, training accuracy 0.9800000190734863
[2017-07-03 13:57:43] => [Train][Adam] - step 9999, training accuracy 1.0
[2017-07-03 13:57:43] =>
[2017-07-03 14:02:04] => [Test][Adam] - test accuracy 0.9898999929428101
[2017-07-03 14:02:04] =>
```

#### Output (GradientDescent)  ####
  
```
[2017-07-03 14:02:25] => [Train][GradientDescent] - step 0, training accuracy 0.10000000149011612
[2017-07-03 14:02:43] => [Train][GradientDescent] - step 100, training accuracy 0.36000001430511475
[2017-07-03 14:02:59] => [Train][GradientDescent] - step 200, training accuracy 0.36000001430511475
[2017-07-03 14:03:16] => [Train][GradientDescent] - step 300, training accuracy 0.4000000059604645
[2017-07-03 14:03:31] => [Train][GradientDescent] - step 400, training accuracy 0.4000000059604645
[2017-07-03 14:03:46] => [Train][GradientDescent] - step 500, training accuracy 0.5
[2017-07-03 14:04:01] => [Train][GradientDescent] - step 600, training accuracy 0.47999998927116394
[2017-07-03 14:04:16] => [Train][GradientDescent] - step 700, training accuracy 0.3400000035762787
[2017-07-03 14:04:32] => [Train][GradientDescent] - step 800, training accuracy 0.47999998927116394
[2017-07-03 14:04:48] => [Train][GradientDescent] - step 900, training accuracy 0.5
[2017-07-03 14:05:24] => [Train][GradientDescent] - step 1000, training accuracy 0.6000000238418579
[2017-07-03 14:05:34] => [Train][GradientDescent] - step 1100, training accuracy 0.6000000238418579
[2017-07-03 14:05:44] => [Train][GradientDescent] - step 1200, training accuracy 0.5400000214576721
[2017-07-03 14:05:54] => [Train][GradientDescent] - step 1300, training accuracy 0.6600000262260437
[2017-07-03 14:06:03] => [Train][GradientDescent] - step 1400, training accuracy 0.5199999809265137
[2017-07-03 14:06:13] => [Train][GradientDescent] - step 1500, training accuracy 0.5199999809265137
[2017-07-03 14:06:22] => [Train][GradientDescent] - step 1600, training accuracy 0.5799999833106995
[2017-07-03 14:06:32] => [Train][GradientDescent] - step 1700, training accuracy 0.41999998688697815
[2017-07-03 14:06:41] => [Train][GradientDescent] - step 1800, training accuracy 0.5799999833106995
[2017-07-03 14:06:50] => [Train][GradientDescent] - step 1900, training accuracy 0.6600000262260437
[2017-07-03 14:07:00] => [Train][GradientDescent] - step 2000, training accuracy 0.5600000023841858
[2017-07-03 14:07:09] => [Train][GradientDescent] - step 2100, training accuracy 0.7599999904632568
[2017-07-03 14:07:19] => [Train][GradientDescent] - step 2200, training accuracy 0.7200000286102295
[2017-07-03 14:07:28] => [Train][GradientDescent] - step 2300, training accuracy 0.699999988079071
[2017-07-03 14:07:38] => [Train][GradientDescent] - step 2400, training accuracy 0.7200000286102295
[2017-07-03 14:07:47] => [Train][GradientDescent] - step 2500, training accuracy 0.7400000095367432
[2017-07-03 14:07:57] => [Train][GradientDescent] - step 2600, training accuracy 0.7599999904632568
[2017-07-03 14:08:07] => [Train][GradientDescent] - step 2700, training accuracy 0.800000011920929
[2017-07-03 14:08:16] => [Train][GradientDescent] - step 2800, training accuracy 0.699999988079071
[2017-07-03 14:08:26] => [Train][GradientDescent] - step 2900, training accuracy 0.9399999976158142
[2017-07-03 14:08:36] => [Train][GradientDescent] - step 3000, training accuracy 0.8199999928474426
[2017-07-03 14:08:45] => [Train][GradientDescent] - step 3100, training accuracy 0.7200000286102295
[2017-07-03 14:08:55] => [Train][GradientDescent] - step 3200, training accuracy 0.7599999904632568
[2017-07-03 14:09:05] => [Train][GradientDescent] - step 3300, training accuracy 0.800000011920929
[2017-07-03 14:09:14] => [Train][GradientDescent] - step 3400, training accuracy 0.7599999904632568
[2017-07-03 14:09:24] => [Train][GradientDescent] - step 3500, training accuracy 0.7200000286102295
[2017-07-03 14:09:33] => [Train][GradientDescent] - step 3600, training accuracy 0.7599999904632568
[2017-07-03 14:09:43] => [Train][GradientDescent] - step 3700, training accuracy 0.7799999713897705
[2017-07-03 14:09:52] => [Train][GradientDescent] - step 3800, training accuracy 0.7799999713897705
[2017-07-03 14:10:02] => [Train][GradientDescent] - step 3900, training accuracy 0.7400000095367432
[2017-07-03 14:10:11] => [Train][GradientDescent] - step 4000, training accuracy 0.7200000286102295
[2017-07-03 14:10:21] => [Train][GradientDescent] - step 4100, training accuracy 0.8399999737739563
[2017-07-03 14:10:30] => [Train][GradientDescent] - step 4200, training accuracy 0.8799999952316284
[2017-07-03 14:10:40] => [Train][GradientDescent] - step 4300, training accuracy 0.7200000286102295
[2017-07-03 14:10:49] => [Train][GradientDescent] - step 4400, training accuracy 0.7400000095367432
[2017-07-03 14:10:59] => [Train][GradientDescent] - step 4500, training accuracy 0.8999999761581421
[2017-07-03 14:11:08] => [Train][GradientDescent] - step 4600, training accuracy 0.8600000143051147
[2017-07-03 14:11:18] => [Train][GradientDescent] - step 4700, training accuracy 0.7799999713897705
[2017-07-03 14:11:28] => [Train][GradientDescent] - step 4800, training accuracy 0.800000011920929
[2017-07-03 14:11:37] => [Train][GradientDescent] - step 4900, training accuracy 0.7799999713897705
[2017-07-03 14:11:47] => [Train][GradientDescent] - step 5000, training accuracy 0.7799999713897705
[2017-07-03 14:11:57] => [Train][GradientDescent] - step 5100, training accuracy 0.7799999713897705
[2017-07-03 14:12:07] => [Train][GradientDescent] - step 5200, training accuracy 0.9200000166893005
[2017-07-03 14:12:16] => [Train][GradientDescent] - step 5300, training accuracy 0.8199999928474426
[2017-07-03 14:12:26] => [Train][GradientDescent] - step 5400, training accuracy 0.7599999904632568
[2017-07-03 14:12:36] => [Train][GradientDescent] - step 5500, training accuracy 0.8799999952316284
[2017-07-03 14:12:45] => [Train][GradientDescent] - step 5600, training accuracy 0.7799999713897705
[2017-07-03 14:12:55] => [Train][GradientDescent] - step 5700, training accuracy 0.8600000143051147
[2017-07-03 14:13:05] => [Train][GradientDescent] - step 5800, training accuracy 0.800000011920929
[2017-07-03 14:13:15] => [Train][GradientDescent] - step 5900, training accuracy 0.7799999713897705
[2017-07-03 14:13:25] => [Train][GradientDescent] - step 6000, training accuracy 0.8600000143051147
[2017-07-03 14:13:34] => [Train][GradientDescent] - step 6100, training accuracy 0.7799999713897705
[2017-07-03 14:13:44] => [Train][GradientDescent] - step 6200, training accuracy 0.9200000166893005
[2017-07-03 14:13:54] => [Train][GradientDescent] - step 6300, training accuracy 0.8600000143051147
[2017-07-03 14:14:03] => [Train][GradientDescent] - step 6400, training accuracy 0.8799999952316284
[2017-07-03 14:14:13] => [Train][GradientDescent] - step 6500, training accuracy 0.7799999713897705
[2017-07-03 14:14:23] => [Train][GradientDescent] - step 6600, training accuracy 0.8399999737739563
[2017-07-03 14:14:33] => [Train][GradientDescent] - step 6700, training accuracy 0.8799999952316284
[2017-07-03 14:14:43] => [Train][GradientDescent] - step 6800, training accuracy 0.800000011920929
[2017-07-03 14:14:52] => [Train][GradientDescent] - step 6900, training accuracy 0.8600000143051147
[2017-07-03 14:15:02] => [Train][GradientDescent] - step 7000, training accuracy 0.8799999952316284
[2017-07-03 14:15:12] => [Train][GradientDescent] - step 7100, training accuracy 0.8600000143051147
[2017-07-03 14:15:21] => [Train][GradientDescent] - step 7200, training accuracy 0.8199999928474426
[2017-07-03 14:15:31] => [Train][GradientDescent] - step 7300, training accuracy 0.7799999713897705
[2017-07-03 14:15:41] => [Train][GradientDescent] - step 7400, training accuracy 0.8799999952316284
[2017-07-03 14:15:51] => [Train][GradientDescent] - step 7500, training accuracy 0.8199999928474426
[2017-07-03 14:16:01] => [Train][GradientDescent] - step 7600, training accuracy 0.8999999761581421
[2017-07-03 14:16:11] => [Train][GradientDescent] - step 7700, training accuracy 0.8600000143051147
[2017-07-03 14:16:21] => [Train][GradientDescent] - step 7800, training accuracy 0.8600000143051147
[2017-07-03 14:16:31] => [Train][GradientDescent] - step 7900, training accuracy 0.9200000166893005
[2017-07-03 14:16:40] => [Train][GradientDescent] - step 8000, training accuracy 0.8999999761581421
[2017-07-03 14:16:50] => [Train][GradientDescent] - step 8100, training accuracy 0.8399999737739563
[2017-07-03 14:16:59] => [Train][GradientDescent] - step 8200, training accuracy 0.8799999952316284
[2017-07-03 14:17:09] => [Train][GradientDescent] - step 8300, training accuracy 0.9200000166893005
[2017-07-03 14:17:18] => [Train][GradientDescent] - step 8400, training accuracy 0.8799999952316284
[2017-07-03 14:17:28] => [Train][GradientDescent] - step 8500, training accuracy 0.8399999737739563
[2017-07-03 14:17:37] => [Train][GradientDescent] - step 8600, training accuracy 0.9200000166893005
[2017-07-03 14:17:47] => [Train][GradientDescent] - step 8700, training accuracy 0.8600000143051147
[2017-07-03 14:17:56] => [Train][GradientDescent] - step 8800, training accuracy 0.8799999952316284
[2017-07-03 14:18:06] => [Train][GradientDescent] - step 8900, training accuracy 0.800000011920929
[2017-07-03 14:18:15] => [Train][GradientDescent] - step 9000, training accuracy 0.8399999737739563
[2017-07-03 14:18:24] => [Train][GradientDescent] - step 9100, training accuracy 0.8600000143051147
[2017-07-03 14:18:34] => [Train][GradientDescent] - step 9200, training accuracy 0.9399999976158142
[2017-07-03 14:18:44] => [Train][GradientDescent] - step 9300, training accuracy 0.9800000190734863
[2017-07-03 14:18:53] => [Train][GradientDescent] - step 9400, training accuracy 0.9200000166893005
[2017-07-03 14:19:03] => [Train][GradientDescent] - step 9500, training accuracy 0.8199999928474426
[2017-07-03 14:19:12] => [Train][GradientDescent] - step 9600, training accuracy 0.9200000166893005
[2017-07-03 14:19:22] => [Train][GradientDescent] - step 9700, training accuracy 0.9200000166893005
[2017-07-03 14:19:31] => [Train][GradientDescent] - step 9800, training accuracy 0.9200000166893005
[2017-07-03 14:19:40] => [Train][GradientDescent] - step 9900, training accuracy 0.8399999737739563
[2017-07-03 14:19:50] => [Train][GradientDescent] - step 9999, training accuracy 0.8600000143051147
[2017-07-03 14:19:50] =>
[2017-07-03 14:21:06] => [Test][GradientDescent] - test accuracy 0.8906999826431274
[2017-07-03 14:21:08] =>
```

#### Output (RMSProp)  ####
  
```
[2017-07-03 14:42:53] => [Train][RMSProp] - step 0, training accuracy 0.25999999046325684
[2017-07-03 14:43:03] => [Train][RMSProp] - step 100, training accuracy 0.6200000047683716
[2017-07-03 14:43:13] => [Train][RMSProp] - step 200, training accuracy 0.8600000143051147
[2017-07-03 14:43:22] => [Train][RMSProp] - step 300, training accuracy 0.8799999952316284
[2017-07-03 14:43:32] => [Train][RMSProp] - step 400, training accuracy 0.8999999761581421
[2017-07-03 14:43:41] => [Train][RMSProp] - step 500, training accuracy 0.8999999761581421
[2017-07-03 14:43:51] => [Train][RMSProp] - step 600, training accuracy 1.0
[2017-07-03 14:44:01] => [Train][RMSProp] - step 700, training accuracy 1.0
[2017-07-03 14:44:10] => [Train][RMSProp] - step 800, training accuracy 0.9599999785423279
[2017-07-03 14:44:20] => [Train][RMSProp] - step 900, training accuracy 0.9800000190734863
[2017-07-03 14:44:30] => [Train][RMSProp] - step 1000, training accuracy 0.9599999785423279
[2017-07-03 14:44:39] => [Train][RMSProp] - step 1100, training accuracy 0.9200000166893005
[2017-07-03 14:44:49] => [Train][RMSProp] - step 1200, training accuracy 1.0
[2017-07-03 14:44:59] => [Train][RMSProp] - step 1300, training accuracy 1.0
[2017-07-03 14:45:08] => [Train][RMSProp] - step 1400, training accuracy 0.9800000190734863
[2017-07-03 14:45:18] => [Train][RMSProp] - step 1500, training accuracy 1.0
[2017-07-03 14:45:27] => [Train][RMSProp] - step 1600, training accuracy 1.0
[2017-07-03 14:45:37] => [Train][RMSProp] - step 1700, training accuracy 0.9800000190734863
[2017-07-03 14:45:47] => [Train][RMSProp] - step 1800, training accuracy 1.0
[2017-07-03 14:45:57] => [Train][RMSProp] - step 1900, training accuracy 1.0
[2017-07-03 14:46:06] => [Train][RMSProp] - step 2000, training accuracy 1.0
[2017-07-03 14:46:16] => [Train][RMSProp] - step 2100, training accuracy 0.9399999976158142
[2017-07-03 14:46:26] => [Train][RMSProp] - step 2200, training accuracy 0.9599999785423279
[2017-07-03 14:46:35] => [Train][RMSProp] - step 2300, training accuracy 0.9800000190734863
[2017-07-03 14:46:45] => [Train][RMSProp] - step 2400, training accuracy 0.9800000190734863
[2017-07-03 14:46:55] => [Train][RMSProp] - step 2500, training accuracy 0.9599999785423279
[2017-07-03 14:47:05] => [Train][RMSProp] - step 2600, training accuracy 0.9599999785423279
[2017-07-03 14:47:14] => [Train][RMSProp] - step 2700, training accuracy 0.9599999785423279
[2017-07-03 14:47:24] => [Train][RMSProp] - step 2800, training accuracy 0.9599999785423279
[2017-07-03 14:47:34] => [Train][RMSProp] - step 2900, training accuracy 1.0
[2017-07-03 14:47:43] => [Train][RMSProp] - step 3000, training accuracy 0.9800000190734863
[2017-07-03 14:47:53] => [Train][RMSProp] - step 3100, training accuracy 0.9800000190734863
[2017-07-03 14:48:03] => [Train][RMSProp] - step 3200, training accuracy 0.9800000190734863
[2017-07-03 14:48:13] => [Train][RMSProp] - step 3300, training accuracy 1.0
[2017-07-03 14:48:22] => [Train][RMSProp] - step 3400, training accuracy 1.0
[2017-07-03 14:48:32] => [Train][RMSProp] - step 3500, training accuracy 0.9800000190734863
[2017-07-03 14:48:42] => [Train][RMSProp] - step 3600, training accuracy 1.0
[2017-07-03 14:48:51] => [Train][RMSProp] - step 3700, training accuracy 0.9800000190734863
[2017-07-03 14:49:01] => [Train][RMSProp] - step 3800, training accuracy 0.9399999976158142
[2017-07-03 14:49:11] => [Train][RMSProp] - step 3900, training accuracy 0.9800000190734863
[2017-07-03 14:49:21] => [Train][RMSProp] - step 4000, training accuracy 1.0
[2017-07-03 14:49:30] => [Train][RMSProp] - step 4100, training accuracy 0.9599999785423279
[2017-07-03 14:49:40] => [Train][RMSProp] - step 4200, training accuracy 0.9800000190734863
[2017-07-03 14:49:50] => [Train][RMSProp] - step 4300, training accuracy 1.0
[2017-07-03 14:49:59] => [Train][RMSProp] - step 4400, training accuracy 1.0
[2017-07-03 14:50:09] => [Train][RMSProp] - step 4500, training accuracy 1.0
[2017-07-03 14:50:19] => [Train][RMSProp] - step 4600, training accuracy 1.0
[2017-07-03 14:50:29] => [Train][RMSProp] - step 4700, training accuracy 1.0
[2017-07-03 14:50:38] => [Train][RMSProp] - step 4800, training accuracy 1.0
[2017-07-03 14:50:48] => [Train][RMSProp] - step 4900, training accuracy 1.0
[2017-07-03 14:50:58] => [Train][RMSProp] - step 5000, training accuracy 0.9800000190734863
[2017-07-03 14:51:07] => [Train][RMSProp] - step 5100, training accuracy 1.0
[2017-07-03 14:51:17] => [Train][RMSProp] - step 5200, training accuracy 0.9800000190734863
[2017-07-03 14:51:27] => [Train][RMSProp] - step 5300, training accuracy 0.9800000190734863
[2017-07-03 14:51:36] => [Train][RMSProp] - step 5400, training accuracy 0.9399999976158142
[2017-07-03 14:51:46] => [Train][RMSProp] - step 5500, training accuracy 1.0
[2017-07-03 14:51:56] => [Train][RMSProp] - step 5600, training accuracy 0.9800000190734863
[2017-07-03 14:52:05] => [Train][RMSProp] - step 5700, training accuracy 1.0
[2017-07-03 14:52:15] => [Train][RMSProp] - step 5800, training accuracy 1.0
[2017-07-03 14:52:25] => [Train][RMSProp] - step 5900, training accuracy 1.0
[2017-07-03 14:52:34] => [Train][RMSProp] - step 6000, training accuracy 1.0
[2017-07-03 14:52:44] => [Train][RMSProp] - step 6100, training accuracy 1.0
[2017-07-03 14:52:54] => [Train][RMSProp] - step 6200, training accuracy 0.9800000190734863
[2017-07-03 14:53:04] => [Train][RMSProp] - step 6300, training accuracy 1.0
[2017-07-03 14:53:13] => [Train][RMSProp] - step 6400, training accuracy 1.0
[2017-07-03 14:53:23] => [Train][RMSProp] - step 6500, training accuracy 1.0
[2017-07-03 14:53:33] => [Train][RMSProp] - step 6600, training accuracy 1.0
[2017-07-03 14:53:42] => [Train][RMSProp] - step 6700, training accuracy 1.0
[2017-07-03 14:53:52] => [Train][RMSProp] - step 6800, training accuracy 0.9800000190734863
[2017-07-03 14:54:02] => [Train][RMSProp] - step 6900, training accuracy 0.9800000190734863
[2017-07-03 14:54:12] => [Train][RMSProp] - step 7000, training accuracy 0.9800000190734863
[2017-07-03 14:54:22] => [Train][RMSProp] - step 7100, training accuracy 1.0
[2017-07-03 14:54:31] => [Train][RMSProp] - step 7200, training accuracy 0.9800000190734863
[2017-07-03 14:54:41] => [Train][RMSProp] - step 7300, training accuracy 0.9599999785423279
[2017-07-03 14:54:51] => [Train][RMSProp] - step 7400, training accuracy 1.0
[2017-07-03 14:55:00] => [Train][RMSProp] - step 7500, training accuracy 1.0
[2017-07-03 14:55:10] => [Train][RMSProp] - step 7600, training accuracy 1.0
[2017-07-03 14:55:20] => [Train][RMSProp] - step 7700, training accuracy 1.0
[2017-07-03 14:55:29] => [Train][RMSProp] - step 7800, training accuracy 0.9800000190734863
[2017-07-03 14:55:39] => [Train][RMSProp] - step 7900, training accuracy 0.9800000190734863
[2017-07-03 14:55:49] => [Train][RMSProp] - step 8000, training accuracy 1.0
[2017-07-03 14:55:58] => [Train][RMSProp] - step 8100, training accuracy 1.0
[2017-07-03 14:56:08] => [Train][RMSProp] - step 8200, training accuracy 0.9800000190734863
[2017-07-03 14:56:18] => [Train][RMSProp] - step 8300, training accuracy 0.9800000190734863
[2017-07-03 14:56:27] => [Train][RMSProp] - step 8400, training accuracy 1.0
[2017-07-03 14:56:37] => [Train][RMSProp] - step 8500, training accuracy 0.9800000190734863
[2017-07-03 14:56:47] => [Train][RMSProp] - step 8600, training accuracy 1.0
[2017-07-03 14:56:56] => [Train][RMSProp] - step 8700, training accuracy 0.9800000190734863
[2017-07-03 14:57:06] => [Train][RMSProp] - step 8800, training accuracy 1.0
[2017-07-03 14:57:16] => [Train][RMSProp] - step 8900, training accuracy 1.0
[2017-07-03 14:57:25] => [Train][RMSProp] - step 9000, training accuracy 1.0
[2017-07-03 14:57:35] => [Train][RMSProp] - step 9100, training accuracy 1.0
[2017-07-03 14:57:45] => [Train][RMSProp] - step 9200, training accuracy 1.0
[2017-07-03 14:57:54] => [Train][RMSProp] - step 9300, training accuracy 1.0
[2017-07-03 14:58:04] => [Train][RMSProp] - step 9400, training accuracy 0.9599999785423279
[2017-07-03 14:58:14] => [Train][RMSProp] - step 9500, training accuracy 1.0
[2017-07-03 14:58:23] => [Train][RMSProp] - step 9600, training accuracy 1.0
[2017-07-03 14:58:33] => [Train][RMSProp] - step 9700, training accuracy 0.9399999976158142
[2017-07-03 14:58:43] => [Train][RMSProp] - step 9800, training accuracy 1.0
[2017-07-03 14:58:52] => [Train][RMSProp] - step 9900, training accuracy 0.9800000190734863
[2017-07-03 14:59:02] => [Train][RMSProp] - step 9999, training accuracy 1.0
[2017-07-03 14:59:02] =>
[2017-07-03 14:59:39] => [Test][RMSProp] - test accuracy 0.9890999794006348
[2017-07-03 14:59:40] =>

```