In [0]:
import tensorflow as tf
import numpy as np
from sklearn.utils import shuffle

In [0]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [0]:
train_xdata = np.array([np.reshape(x, (28, 28, 1)) for x in mnist.train.images])
test_xdata = np.array([np.reshape(x, (28, 28, 1)) for x in mnist.test.images])
 # CNN이기 때문에 28*28이 아니라 (28, 28, 1), 마지막 1은 channel의 개수(=C)
train_labels, test_labels = mnist.train.labels, mnist.test.labels

In [0]:
train_xdata.shape, train_labels.shape, test_xdata.shape, test_labels.shape

((55000, 28, 28, 1), (55000, 10), (10000, 28, 28, 1), (10000, 10))

In [0]:
I_D = tf.placeholder(tf.float32, [None, 28, 28, 1])
 # shape=[N, W, H, C], batch 학습을 위해 N 대신 None을 써줌
Y = tf.placeholder(tf.float32, [None, 10])
# shape=[N, # of classes]

C_W1 = tf.Variable(tf.truncated_normal([4, 4, 1, 32], stddev=0.01))
 # shape = [FW1, FH1, C, FN1]
C_W2 = tf.Variable(tf.truncated_normal([4, 4, 32, 64], stddev=0.01))
 # shape = [FW2, FH2, C2, FN2], C2 = FN1

In [0]:
### Convolution Layer1
conv1 = tf.nn.conv2d( I_D, C_W1, strides = [1, 1, 1, 1], padding = "SAME" )
bias1 = tf.Variable(tf.zeros([1, 1, 1, 32]))
relu1 = tf.nn.relu(conv1 + bias1)
max_pool1 = tf.nn.max_pool(relu1, ksize=[1, 4, 4, 1], strides=[1, 2, 2, 1], padding="SAME")
 # ksize=[1, FW1, FH1, 1], strides=[1, P, P, 1] (P:Pooling size)

### Convolution Layer2
conv2 = tf.nn.conv2d( max_pool1, C_W2, strides = [1, 1, 1, 1], padding = "SAME" )
bias2 = tf.Variable(tf.zeros([1, 1, 1, 64]))
relu2 = tf.nn.relu(conv2 + bias2)
max_pool2 = tf.nn.max_pool(relu2, ksize=[1, 4, 4, 1], strides=[1, 2, 2, 1], padding="SAME")
 # ksize=[1, FW2, FH2, 1], strides=[1, P, P, 1]

In [0]:
max_pool2.shape

TensorShape([Dimension(None), Dimension(7), Dimension(7), Dimension(64)])

In [0]:
FCS = max_pool2.shape                                    # final_conv_shape
final_shape = int(FCS[1] * FCS[2] * FCS[3])              
flat_output = tf.reshape(max_pool2, [-1, final_shape])
 # Affine layer를 위해 1차원으로 바꿔줌, shape의 첫번째 원소는 batch 학습을 위해 None의 의미인 -1

In [0]:
flat_output.shape

TensorShape([Dimension(None), Dimension(3136)])

In [0]:
n_hidden = 128                                                 # Affine layer1의 node 수

### Affine layer1
W1 = tf.Variable(tf.truncated_normal([final_shape, n_hidden], stddev = tf.sqrt(2/final_shape)))
b1 = tf.Variable(tf.zeros([1, n_hidden]))                      # (He 초기값)
Z1 = tf.nn.relu(tf.matmul(flat_output, W1) + b1)

### Affine layer2
W2 = tf.Variable(tf.truncated_normal([n_hidden, 10], stddev = tf.sqrt(2/(n_hidden+10))))
b2 = tf.Variable(tf.zeros([1, 10]))                            # └> (Xavier 초기값)
Z2 = tf.nn.softmax(tf.matmul(Z1, W2) + b2)                     # => 최종 output

In [0]:
cross_entropy = tf.reduce_mean(-tf.reduce_sum(Y * tf.log(Z2), axis=1))
train_step = tf.train.AdadeltaOptimizer(learning_rate=1.0).minimize(cross_entropy)
 # Adaptive Delta Optimizer 이용 (learning rate는 보통 1.0을 사용)

In [0]:
correct_prediction = tf.equal(tf.argmax(Z2, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [0]:
batch_size = 128
n_batches = 55000//batch_size

In [0]:
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)                                                               # 변수 초기화

In [0]:
for epoch in range(18):
    
    X_, Y_ = shuffle(train_xdata, train_labels)
    my_accuracy = sess.run(accuracy, feed_dict = { I_D: test_xdata, Y: test_labels })                 
    print(epoch, my_accuracy)                             # step마다 accuracy를 확인하기 위한 코드
    
    for i in range(n_batches):
        start = i * batch_size
        end = start + batch_size
        sess.run(train_step, feed_dict = { I_D: X_[start:end], Y: Y_[start:end] })

0 0.0974
1 0.9817
2 0.9882
3 0.9898
4 0.9891
5 0.9914
6 0.9934
7 0.993
8 0.9935
9 0.9926
10 0.9918
11 0.9926
12 0.9934
13 0.9939
14 0.9929
15 0.9938
16 0.9948
17 0.9933


In [0]:
sess.run(accuracy, feed_dict = { I_D: test_xdata, Y: test_labels })          # 최종 accuracy

0.9941