In [13]:
import tensorflow as tf

## MNIST 데이터 로드하기

In [14]:
from tensorflow.examples.tutorials.mnist import input_data

In [15]:
# 레이블은 정수 형태이지만, 훈련을 위해서 one-hot 인코딩으로 로드
mnist_data = input_data.read_data_sets('MNIST_data', one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


## 퍼셉트론 구축하기

### 1. 입력 데이터와 타깃을 위한 플레이스홀더 정의하기

In [16]:
input_size = 784
no_classes = 10
batch_size = 100
total_batches = 200

# batch size X input_size
x_input = tf.placeholder(tf.float32, shape=[None, input_size])
# batch_size X no_classes. one-hot label
y_input = tf.placeholder(tf.float32, shape=[None, no_classes])

### 2. Fully-connected Layer를 위한 변수 정의하기

In [17]:
# input_size X no_classes (각 픽셀과 클래스 사이의 weight)
weights = tf.Variable(tf.random_normal([input_size, no_classes]))
# no_classes X 1 (클래스 수와 동일)
bias = tf.Variable(tf.random_normal([no_classes]))

In [18]:
# 입력 값에 weight를 부여하고 bias 값 추가
# 각각의 입력 데이터가 어떤 클래스에 해당하는지 예측하고 bias 추가

# 행렬간 곱셈 = batch size X no_classes
logits = tf.matmul(x_input, weights) + bias

In [19]:
# logits과 y_input 비교
softmax_cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
    labels=y_input, logits=logits)
# cross entropy의 평균으로 loss 계산
loss_operation = tf.reduce_mean(softmax_cross_entropy)
# loss_operation이 minimize되도록 weights, bias 학습
optimizer = tf.train.GradientDescentOptimizer(
    learning_rate=0.5).minimize(loss_operation)

### 3. 데이터로 모델 훈련하기

In [20]:
session = tf.Session()
session.run(tf.global_variables_initializer())

In [21]:
for batch_no in range(total_batches):
    mnist_batch = mnist_data.train.next_batch(batch_size)
    _, loss_value = session.run([optimizer, loss_operation], feed_dict={
        x_input: mnist_batch[0],  # 입력 레이블
        y_input: mnist_batch[1]   # 타깃 레이블
    })
    print(loss_value)

12.327893
9.8321905
9.286905
7.8663096
7.1486235
6.434574
7.615304
6.2233124
5.891106
5.6127553
6.0298896
4.6900806
5.6410866
4.190892
4.7605786
3.8219755
3.9370568
4.279015
3.8470995
3.143919
4.0070662
2.8145025
3.2418168
2.6417022
2.5455596
2.5386136
2.9806478
2.464143
2.695445
2.77966
2.660437
2.5104318
2.1141922
2.575287
2.652555
2.2257557
1.6332338
2.197292
2.5026898
2.235608
2.5135107
2.0827596
2.3062208
2.1084704
2.7160363
2.2956934
1.895342
2.4654255
1.4585341
1.8058511
2.1038103
2.2549686
1.8889035
1.159917
1.8825428
1.3570154
2.0904365
1.7879764
2.1992357
1.1618667
1.7638595
1.8645813
1.325462
1.4065607
1.6055588
1.3233048
1.517059
1.5202172
1.8770517
1.925462
1.5864878
1.1498439
2.2059522
1.4388951
1.6337367
1.796724
1.7070689
1.415786
1.499295
1.9212793
1.7979361
1.395206
1.8703269
1.6748674
1.5769141
1.1558908
1.4059078
1.0090175
1.4571388
1.0135617
1.3928463
1.0050179
1.2536724
1.1816459
1.4630802
1.5409964
1.3632557
1.6082234
1.6162393
1.3412174
1.4676079
1.820896
1.8612

In [22]:
# 모델이 잘 동작하는지 테스트
# tf.argmax: 텐서 내의 지정된 축(0: 열, 1: 행, 2: 면)에서 가장 높은 값의 인덱스 반환

# 정확도 계산
# 1. 가장 큰 값의 인덱스가 예측값
predictions = tf.argmax(logits, 1)
# 2. 예측값과 정답이 일치하는지 비교
correct_predictions = tf.equal(predictions, tf.argmax(y_input, 1))
# 3. 일치 여부를 평균내어 정확도 계산
accuracy_operation = tf.reduce_mean(tf.cast(correct_predictions,
                                            tf.float32))
test_images, test_labels = mnist_data.test.images, mnist_data.test.labels
accuracy_value = session.run(accuracy_operation, feed_dict={
    x_input: test_images,
    y_input: test_labels
})
print('Accuracy : ', accuracy_value)
session.close()

Accuracy :  0.812


## 다중 레이어 컨볼루션 신경망 구축하기

In [23]:
# 변수의 통계를 시각화하기 위해 tf.summary에 변수의 통계 값 추가
def add_variable_summary(tf_variable, summary_name):
    with tf.name_scope(summary_name + '_summary'):
        mean = tf.reduce_mean(tf_variable)
        tf.summary.scalar('Mean', mean)  # 평균
        with tf.name_scope('standard_deviation'):
            standard_deviation = tf.sqrt(tf.reduce_mean(  # √편차 제곱의 평균
                tf.square(tf_variable - mean)))
        tf.summary.scalar('StandardDeviation', standard_deviation)  # 표준 편차
        tf.summary.scalar('Maximum', tf.reduce_max(tf_variable))  # 최대
        tf.summary.scalar('Minimum', tf.reduce_min(tf_variable))  # 최소
        tf.summary.histogram('Histogram', tf_variable)  # 히스토그램

In [24]:
# 2차원 이미지 형태로 변경
x_input_reshape = tf.reshape(x_input, [-1, 28, 28, 1],  # N X 28 X 28 X 1
                             name='input_reshape')

In [25]:
# 컨볼루션 레이어 정의
def convolution_layer(input_layer, filters, kernel_size=[3, 3],
                      activation=tf.nn.relu):
    layer = tf.layers.conv2d(
        inputs=input_layer,
        filters=filters,
        kernel_size=kernel_size,
        activation=activation
    )
    add_variable_summary(layer, 'convolution')
    return layer

In [26]:
# 풀링 레이어 정의
def pooling_layer(input_layer, pool_size=[2, 2], strides=2):
    layer = tf.layers.max_pooling2d(
        inputs=input_layer,
        pool_size=pool_size,
        strides=strides
    )
    add_variable_summary(layer, 'pooling')
    return layer

In [27]:
# Fully-connected 레이어 정의
def dense_layer(input_layer, units, activation=tf.nn.relu):
    layer = tf.layers.dense(
        inputs=input_layer,
        units=units,
        activation=activation)
    add_variable_summary(layer, 'dense')
    return layer

In [28]:
# n개의 필터를 적용하면 feature map도 n개의 채널을 가짐

# 출력의 크기
# N X 26 X 26 X 64
convolution_layer_1 = convolution_layer(x_input_reshape, 64)
# N X 13 X 13 X 64
pooling_layer_1 = pooling_layer(convolution_layer_1)
# N X 11 X 11 X 128
convolution_layer_2 = convolution_layer(pooling_layer_1, 128)
# N X 5 X 5 X 128 (마지막 열/행이 잘림)
pooling_layer_2 = pooling_layer(convolution_layer_2)
# Dense Layer로 Pooling Layer의 출력을 넘기기 위해 텐서를 평평하게 함
flattened_pool = tf.reshape(pooling_layer_2, [-1, 5 * 5 * 128],
                            name='flattened_pool')
dense_layer_bottleneck = dense_layer(flattened_pool, 1024)

In [29]:
# Dropout: 학습을 할 때마다 임의의 unit을 삭제하는 정규화 방법
dropout_bool = tf.placeholder(tf.bool)
dropout_layer = tf.layers.dropout(
    inputs=dense_layer_bottleneck,
    rate=0.4,
    training=dropout_bool
)

In [30]:
logits = dense_layer(dropout_layer, no_classes)

In [31]:
with tf.name_scope('loss'):
    softmax_cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
        labels=y_input, logits=logits)
    loss_operation = tf.reduce_mean(softmax_cross_entropy, name='loss')
    tf.summary.scalar('loss', loss_operation)

In [32]:
# loss 함수 최적화
with tf.name_scope('optimizer'):
    optimizer = tf.train.AdamOptimizer().minimize(loss_operation)

In [33]:
# 정확도 계산
with tf.name_scope('accuracy'):
    with tf.name_scope('correct_prediction'):
        predictions = tf.argmax(logits, 1)
        correct_predictions = tf.equal(predictions, tf.argmax(y_input, 1))
    with tf.name_scope('accuracy'):
        accuracy_operation = tf.reduce_mean(
            tf.cast(correct_predictions, tf.float32))
tf.summary.scalar('accuracy', accuracy_operation)

<tf.Tensor 'accuracy_1:0' shape=() dtype=string>

In [34]:
session = tf.Session()
session.run(tf.global_variables_initializer())

In [35]:
# 앞서 지정한 모든 summary를 넣음
merged_summary_operation = tf.summary.merge_all()
train_summary_writer = tf.summary.FileWriter('/tmp/train', session.graph)
test_summary_writer = tf.summary.FileWriter('/tmp/test')

In [36]:
test_images, test_labels = mnist_data.test.images, mnist_data.test.labels

for batch_no in range(total_batches):
    mnist_batch = mnist_data.train.next_batch(batch_size)
    train_images, train_labels = mnist_batch[0], mnist_batch[1]
    _, merged_summary = session.run([optimizer, merged_summary_operation],
                                    feed_dict={
                                        x_input: train_images,
                                        y_input: train_labels,
                                        dropout_bool: True
                                    })
    train_summary_writer.add_summary(merged_summary, batch_no)
    
    if batch_no % 10 == 0:
        merged_summary, _ = session.run([merged_summary_operation,
                                        accuracy_operation], feed_dict={
            x_input: test_images,
            y_input: test_labels,
            dropout_bool: False
        })
        test_summary_writer.add_summary(merged_summary, batch_no)