# Multi-layer neural networks in tensorflow

In [2]:
import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot = True)

import tensorflow as tf

x = tf.placeholder("float", shape=[None, 784])
y_ = tf.placeholder("float", shape=[None, 10])

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [3]:
#Reconstruct the orignal shape of the image of the input data
x_image = tf.reshape(x, [-1,28,28,1])

def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

In [9]:
#stride of size: 1 , zero padding model , pooling: max-pooling on block of 2 * 2
def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding="SAME")

def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding="SAME")

#first con. layer followed by pooling layer. (32 filters, window size of 5*5)
#Must define tensor to hold weight matrix W with the shape [5,5,1,32]
#first two dimensions are the size of the window, thrid is the amt of channels (1 for our case)
#Last one is how many features we wnat to use

W_conv1 = weight_variable([5,5,1,32])
b_conv1 = bias_variable([32])

#ReLU(Recified Linear unit)
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)

#64 filters with 5*5 window
W_conv2 = weight_variable([5,5,32,64])
b_conv2 = bias_variable([64])

h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)

#result 7 * 7 dim to the 5*5 window to 12*12 space with a stride size of 1

#1024 neurons for entire image. Tensor for the wieghts and bias are as followed:
W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])

#flatten the tensor into a vector
#softmax needs a flattened image in the form as a vecotr as input
#apply the ReLU act. function

h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])

h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

In [10]:
#reduce amt of effective param. in neural network: dropout
keep_prob = tf.placeholder("float")
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

#softmax layer code
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])

y_conv=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)

## Training and Evaluation of the model

In [None]:
#grad. descent optimizer를 ADAM optimizer로 바꿈: 이 알고리즘이 이득이 있기 때문
#keep_prob 추가: dropout층의 확률을 계산
cross_entropy = -tf.reduce_sum(y_*tf.log(y_conv))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) # cast가 뭐지?

sess = tf.Session()

sess.run(tf.initialize_all_variables())
for i in range(20000):
    batch = mnist.train.next_batch(50)
    if i%100 == 0:
        train_accuracy = sess.run( accuracy, feed_dict={x:batch[0], y_: batch[1], keep_prob: 1.0})
        print("step %d, training accuracy %g" %(i, train_accuracy))
    sess.run(train_step,feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})

print("test accuracy %g"% sess.run(accuracy, feed_dict={ x: mnist.test.images, y_: mnist.test.lables, keep_prob: 1.0}))

step 0, training accuracy 0.08
step 100, training accuracy 0.9
step 200, training accuracy 0.94
step 300, training accuracy 0.92
step 400, training accuracy 0.92
step 500, training accuracy 0.88
step 600, training accuracy 0.9
step 700, training accuracy 0.92
step 800, training accuracy 0.94
step 900, training accuracy 0.98
step 1000, training accuracy 0.96
step 1100, training accuracy 1
step 1200, training accuracy 0.98
step 1300, training accuracy 0.98
step 1400, training accuracy 0.94
step 1500, training accuracy 0.96
step 1600, training accuracy 0.94
step 1700, training accuracy 0.98
step 1800, training accuracy 0.96
step 1900, training accuracy 0.98
step 2000, training accuracy 1
step 2100, training accuracy 1
step 2200, training accuracy 0.98
step 2300, training accuracy 0.98
step 2400, training accuracy 0.96
step 2500, training accuracy 0.98
step 2600, training accuracy 1
step 2700, training accuracy 0.96
step 2800, training accuracy 1
step 2900, training accuracy 0.96
step 3000