In [None]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow.keras as keras
tf.random.set_seed(0)

# MNIST
MNIST는 손으로 쓴 숫자들로 이루어진 데이터이다. 60000개의 학습 데이터와 10000개의 테스트 데이터로 구성되어 있으며, 하나의 입력은 $28 \times 28$ 픽셀의 크기를 갖는다.

In [None]:
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
print(x_train.shape)

In [None]:
plt.figure(figsize=(8, 8))
for i in range(16):
    plt.subplot(4, 4, i + 1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(x_train[i], cmap=plt.cm.binary)
    plt.xlabel(y_train[i])
plt.show()

# Fully Connected Network (FCN)
FCN은 한층의 모든 뉴런이 직전 층의 모든 뉴런과 연결되어 있는 `Dense` 레이어만을 이용하여 구성한 네트워크이다.

## 모델 구성

In [None]:
fcn = keras.Sequential([
    keras.layers.Flatten(input_shape=(28, 28)),
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dense(10, activation='softmax')
])
fcn.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
fcn.summary()

## 학습

In [None]:
fcn.fit(x_train, y_train, epochs=5)

## 검증

In [None]:
test_loss, test_acc = fcn.evaluate(x_test, y_test)
print(f'Test loss: {test_loss}')
print(f'Test accuracy: {test_acc}')

## 시각화

In [None]:
n_rows = 5
n_cols = 3
n_images = n_rows * n_cols

In [None]:
predictions = fcn.predict(x_test[:n_images])

In [None]:
plt.figure(figsize=(4 * n_cols, 2 * n_rows))
for i in range(n_images):
    # plot image
    plt.subplot(n_rows, 2 * n_cols, 2 * i + 1)
    plt.grid(False)
    plt.xticks([])
    plt.yticks([])
    plt.imshow(x_test[i], cmap=plt.cm.binary)
    pred = np.argmax(predictions[i])
    ground_truth = y_test[i]
    plt.xlabel(f'{pred} ({ground_truth})',
               color='blue' if pred == ground_truth else 'red')
    # plot prediction
    plt.subplot(n_rows, 2 * n_cols, 2 * i + 2)
    plt.grid(False)
    plt.xticks([])
    plt.yticks([])
    plt.ylim(0, 1)
    prob = predictions[i]
    bars = plt.bar(range(10), prob, color='grey')
    bars[pred].set_color('red')
    bars[ground_truth].set_color('blue')
    plt.xlabel(f'{100 * np.max(prob):.0f}%',
               color='blue' if pred == ground_truth else 'red')
plt.show()

# Convolutional Neural Network (CNN)
CNN은 동물의 시각 피질 영감을 받아 디자인된 네트워크로, 하나의 뉴런은 입력의 전체가 아닌 특정부분을 보고 출력을 계산하는 층들로 구성되었다.

## 모델 구성

In [None]:
cnn = keras.Sequential([
    keras.layers.Conv2D(6, kernel_size=3, strides=1, activation='relu', input_shape=(28, 28, 1), padding='same'),
    keras.layers.AveragePooling2D(),
    keras.layers.Conv2D(16, kernel_size=3, strides=1, activation='relu', padding='same'),
    keras.layers.AveragePooling2D(),
    keras.layers.Flatten(),
    keras.layers.Dense(100, activation='relu'),
    keras.layers.Dense(10, activation='softmax'),
])
cnn.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
cnn.summary()

## 학습

In [None]:
cnn.fit(x_train.reshape(-1, 28, 28, 1), y_train, epochs=5)

## 검증

In [None]:
test_loss, test_acc = cnn.evaluate(x_test.reshape(-1, 28, 28, 1), y_test, verbose=2)
print(f'Test loss: {test_loss}')
print(f'Test accuracy: {test_acc}')

## 시각화

In [None]:
n_rows = 5
n_cols = 3
n_images = n_rows * n_cols

In [None]:
predictions = cnn.predict((x_test[:n_images]).reshape(-1, 28, 28, 1))

In [None]:
plt.figure(figsize=(4 * n_cols, 2 * n_rows))
for i in range(n_images):
    # plot image
    plt.subplot(n_rows, 2 * n_cols, 2 * i + 1)
    plt.grid(False)
    plt.xticks([])
    plt.yticks([])
    plt.imshow(x_test[i], cmap=plt.cm.binary)
    pred = np.argmax(predictions[i])
    ground_truth = y_test[i]
    plt.xlabel(f'{pred} ({ground_truth})',
               color='blue' if pred == ground_truth else 'red')
    # plot prediction
    plt.subplot(n_rows, 2 * n_cols, 2 * i + 2)
    plt.grid(False)
    plt.xticks([])
    plt.yticks([])
    plt.ylim(0, 1)
    prob = predictions[i]
    bars = plt.bar(range(10), prob, color='grey')
    bars[pred].set_color('red')
    bars[ground_truth].set_color('blue')
    plt.xlabel(f'{100 * np.max(prob):.0f}%',
               color='blue' if pred == ground_truth else 'red')
plt.show()