In [0]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.examples.tutorials.mnist import input_data

np.random.seed(20160704)
tf.set_random_seed(20160704)

In [0]:
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [0]:
"""
-batch size: 32
-FC node #: 512
-filter #: 64/128/128 
-activation function(hidden cutoff): relu  
-keep_prob: 0.7
-b_conv: 0.1
-activation(FC): relu
-learning rate: 0.0001
"""

'\n-batch size: 32\n-FC node #: 512\n-filter #: 64/128/128 \n-activation function(hidden cutoff): relu  \n-keep_prob: 0.7\n-b_conv: 0.1\n-activation(FC): relu\n-learning rate: 0.0001\n'

In [0]:
num_filters1 = 64

x = tf.placeholder(tf.float32, [None, 784])
x_image = tf.reshape(x, [-1,28,28,1])

W_conv1 = tf.Variable(tf.truncated_normal([3,3,1,num_filters1], # filter size 3X3
                                          stddev=0.1))
h_conv1 = tf.nn.conv2d(x_image, W_conv1,
                       strides=[1,1,1,1], padding='SAME')

b_conv1 = tf.Variable(tf.constant(0.1, shape=[num_filters1]))
h_conv1_cutoff = tf.nn.relu(h_conv1 + b_conv1)

h_pool1 = tf.nn.max_pool(h_conv1_cutoff, ksize=[1,2,2,1],
                         strides=[1,2,2,1], padding='SAME')

In [0]:
num_filters2 = 128

W_conv2 = tf.Variable(
            tf.truncated_normal([3,3,num_filters1,num_filters2], # filter size 3X3
                                stddev=0.1))
h_conv2 = tf.nn.conv2d(h_pool1, W_conv2,
                       strides=[1,1,1,1], padding='SAME')

b_conv2 = tf.Variable(tf.constant(0.1, shape=[num_filters2]))
h_conv2_cutoff = tf.nn.relu(h_conv2 + b_conv2)

h_pool2 = tf.nn.max_pool(h_conv2_cutoff, ksize=[1,2,2,1],
                         strides=[1,2,2,1], padding='SAME')

In [0]:
# additional hidden layer
num_filters3 = 128

W_conv3 = tf.Variable(
            tf.truncated_normal([3,3,num_filters2,num_filters3], # filter size 3X3
                                stddev=0.1))
h_conv3 = tf.nn.conv2d(h_pool2, W_conv3,
                       strides=[1,1,1,1], padding='SAME')

b_conv3 = tf.Variable(tf.constant(0.1, shape=[num_filters3]))
h_conv3_cutoff = tf.nn.relu(h_conv3 + b_conv3)

h_pool3 = tf.nn.max_pool(h_conv3_cutoff, ksize=[1,2,2,1],
                         strides=[1,2,2,1], padding='SAME')

In [0]:
# fully connected layer, dropout layer, softmax function
h_pool3_flat = tf.reshape(h_pool3, [-1, 4*4*num_filters3]) 

num_units1 = 4*4*num_filters3 # fully connected layer에 입력할 데이터 개수
num_units2 = 512 # fully connected layer의 node 개수

w2 = tf.Variable(tf.truncated_normal([num_units1, num_units2]))
b2 = tf.Variable(tf.constant(0.1, shape=[num_units2]))
hidden2 = tf.nn.relu(tf.matmul(h_pool3_flat, w2) + b2)

keep_prob = tf.placeholder(tf.float32) # dropout probability
hidden3_drop = tf.nn.dropout(hidden2, keep_prob)

w0 = tf.Variable(tf.zeros([num_units2, 10]))
b0 = tf.Variable(tf.zeros([10]))
p = tf.nn.softmax(tf.matmul(hidden3_drop, w0) + b0)

In [0]:
t = tf.placeholder(tf.float32, [None, 10])
loss = -tf.reduce_sum(t * tf.log(tf.clip_by_value(p, 1e-10, 1.0)))
train_step = tf.train.AdamOptimizer(0.0001).minimize(loss)
correct_prediction = tf.equal(tf.argmax(p, 1), tf.argmax(t, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [0]:
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver()



In [0]:
i = 0
for _ in range(200000):
    i += 1
    batch_xs, batch_ts = mnist.train.next_batch(32) # 신경망이 복잡해질수록 작은 batch size
    sess.run(train_step,
             feed_dict={x:batch_xs, t:batch_ts, keep_prob:0.7}) # training 시 parameter 최적화
    if i % 500 == 0:
        loss_vals, acc_vals = [], []
        for c in range(4):
            start = len(mnist.test.labels) // 4 * c
            end = len(mnist.test.labels) // 4 * (c+1)
            loss_val, acc_val = sess.run([loss, accuracy],
                feed_dict={x:mnist.test.images[start:end],
                           t:mnist.test.labels[start:end],
                           keep_prob:1.0})  # parameter 최적화 완료 후 미지의 데이터에 대한 예측할 때 (test 시에는 모두 사용)
            loss_vals.append(loss_val)
            acc_vals.append(acc_val)
        loss_val = np.sum(loss_vals)
        acc_val = np.mean(acc_vals)
        print ('Step: %d, Loss: %f, Accuracy: %f'
               % (i, loss_val, acc_val))
        saver.save(sess, 'cnn_session', global_step=i)

Step: 500, Loss: 1991.633057, Accuracy: 0.938800
Step: 1000, Loss: 1121.740601, Accuracy: 0.966400
Step: 1500, Loss: 932.853638, Accuracy: 0.971300
Step: 2000, Loss: 785.648743, Accuracy: 0.975600
Step: 2500, Loss: 706.690125, Accuracy: 0.976400
Step: 3000, Loss: 558.167603, Accuracy: 0.980600
Step: 3500, Loss: 529.319336, Accuracy: 0.981700
Step: 4000, Loss: 563.638794, Accuracy: 0.981500
Step: 4500, Loss: 538.173035, Accuracy: 0.981100
Step: 5000, Loss: 610.127686, Accuracy: 0.979200
Step: 5500, Loss: 435.879700, Accuracy: 0.986100
Step: 6000, Loss: 397.253510, Accuracy: 0.986300
Step: 6500, Loss: 353.763672, Accuracy: 0.987200
Step: 7000, Loss: 472.455078, Accuracy: 0.984600
Step: 7500, Loss: 496.576508, Accuracy: 0.984400
Step: 8000, Loss: 418.347137, Accuracy: 0.986500
Step: 8500, Loss: 362.044556, Accuracy: 0.986400
Step: 9000, Loss: 321.900024, Accuracy: 0.988900
Step: 9500, Loss: 353.719299, Accuracy: 0.988400
Step: 10000, Loss: 342.230286, Accuracy: 0.988400
Step: 10500, Loss:

In [0]:
!ls cnn_session*