In [None]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import time

print("Package loaded!")

### MNIST Database

Dataset of handwritten digits has a training set of 60,000 examples, and a test set of 10,000 examples from NIST.
The resulting images contain grey levels as a result of the anti-aliasing technique used by the normalization algorithm. The images were centered in a 28x28 image by computing the center of mass of the pixels, and translating the image so as to position this point at the center of the 28x28 field.

#### File Formats For The MNIST Database

There are 4 files:

- train-images-idx3-ubyte : training set images
- train-labels-idx1-ubyte : training set labels
- t10k-images-idx3-ubyte : test set images
- t10k-labels-idx1-ubyte : test set labels

The training set contains 60000 examples, and the test set 10000 examples.

**Training Set Image File(train-images-idx3-ubyte)**

 [offset]	| [type]			| [value]	| [desc]	
------------|-------------------|-----------|-------------
0000|32 bit integer|0x00000803(2051)|magic number(MSB first)
0004|32 bit integer|60000|number of images
0008|32 bit integer|28|number of images
0012|32 bit integer|28|number of columns
0016|unsigned byte|??|pixel
0017|unsigned byte|??|pixel
......|||
xxxx|unsigned byte|??|pixel
Pixels are organized row-wise. Pixel values are 0 to 255. 0 means background(white), 255 means foreground(black).

**Training Set Image File(train-labels-idx3-ubyte)**

 [offset]	| [type]			| [value]	| [desc]	
------------|-------------------|-----------|-------------
0000|32 bit integer|0x00000801(2049)|magic number
0004|32 bit integer|60000|number of items
0008|unsigned byte|??|label
0009|unsigned byte|??|label
......|||
xxxx|unsigned byte|??|label
The labels values are 0 to 9.

In [None]:
_data_dir = "./mnist/"

mnist = input_data.read_data_sets(_data_dir, one_hot=True)

img_test = mnist.test.images
label_test = mnist.test.labels

### Multivariate classification
#### Softmax : normalized exponential function
$$
p(y=j|x;w, b) = \frac{e^{W^{(j)T}x+b^{(j)}}}{\sum^n_{i=1}e^{W^{(i)T}x+b^{(i)}}}
$$

#### Cross-entropy loss
$$
H(w, b) = E_y[l_{x_i}]= E_y[\log{\frac{1}{p(x_i)}}] \\
\quad\quad\quad\quad\quad\quad\quad=\sum^n_{i=1}y_i\log(\frac{1}{p(x_i)}) \\
\quad\quad\quad\quad\quad\quad\quad= -\sum^n_{i=1}y_i\log(p(x_i))
$$

In [None]:
x = tf.placeholder("float", [None, 28*28]) # 784
y = tf.placeholder("float", [None, 10])    # 0 ~ 9
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))

def h(X, w, b):
    return tf.matmul(X, w) + b

def softmax(x):
    return tf.nn.softmax(x)
    #return tf.exp(x) / tf.reduce_sum(tf.exp(x))

def costF(X, Y, w, b):
    #return -tf.reduce_mean(tf.diag_part(tf.matmul(Y, tf.log(softmax(h(X, w, b))), transpose_b=True)))
    #return tf.reduce_mean(-tf.reduce_sum(Y*tf.log(softmax(h(X, w, b))), reduction_indices=1))
    #return -tf.reduce_sum(Y*tf.log(softmax(h(X, w, b))))
    return tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=Y, logits=h(X, w, b)))

ce = costF(x, y, W, b)

learning_rate = 0.001
optimizer = tf.train.AdamOptimizer(learning_rate)
#train = optimizer.minimize(costF(x, y, W, b))
train = optimizer.minimize(ce)

In [None]:
# PREDICTION
pred = tf.equal(tf.argmax(softmax(h(x, W, b)), 1), tf.argmax(y, 1)) 
# ACCURACY
accr = tf.reduce_mean(tf.cast(pred, "float"))
# INITIALIZER
init = tf.initialize_all_variables()

In [None]:
n_samples = mnist.train.num_examples
batch_size = 100
total_batch = int(n_samples / batch_size)
start = time.time()

# SESSION
with tf.Session() as sess:
    sess.run(init)
    # MINI-BATCH LEARNING
    for epoch in range(31):
        avg_cost = 0.
        for i in range(total_batch): 
            batch_xs, batch_ys = mnist.train.next_batch(batch_size)
            feed = {x: batch_xs, y: batch_ys}
            sess.run(train, feed)
            avg_cost += sess.run(ce, feed)/batch_size

        # DISPLAY
        if epoch % 10 == 0:
            feeds_train = {x: batch_xs, y: batch_ys}
            feeds_test = {x: img_test, y: label_test}
            train_acc = sess.run(accr, feeds_train)
            test_acc = sess.run(accr, feeds_test)
            print ("Epoch: %03d/%03d cost: %.9f train_acc: %.3f test_acc: %.3f" 
                   % (epoch, 30, avg_cost, train_acc, test_acc))
print ("DONE")
print time.time() - start