In [None]:
import numpy as np
import tensorflow as tf
import matplotlib.pylab as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [None]:
# Data Load
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

In [None]:
plt.figure(figsize=(16,8))
for k in range(32):
    img = mnist.train.images[k].reshape(28,28)
    label = np.argmax(mnist.train.labels[k])
    plt.subplot(4,8,1+k)
    plt.imshow(img, cmap='gray')
    plt.axis('off')
    plt.title(label)
plt.show()

In [None]:
print("Min : {0}, Max : {1}".format(mnist.train.images[0].min(), mnist.train.images[0].max()))

## Multi-label Classification
$$
\min_{W,b} \frac{1}{m}\sum_{i=1}^m\sum_{k=1}^{10} -y_i^k \log(\hat{y_i^k})
$$
where $\hat{y_i^k} = softmax((\sigma(xW_1+b_1)) W_2 + b_2)_k$

1. Forward Model : $\hat{y_i^k} = softmax((\sigma(xW_1+b_1)) W_2 + b_2)_k$
1. Cross-Entropy : $\frac{1}{m}\sum_{i=1}^m\sum_{k=1}^{10} -y_i^k \log(\hat{y_i^k})$



### 01. Model에 맞는 feature 정리
For $i=1,2,\cdots,m$,
$$
\texttt{features}_i = [\texttt{pixel}^i_1, \texttt{pixel}^i_2,\cdots, \texttt{pixel}^i_{784}]
$$

In [None]:
print(mnist.train.images.shape)
features = mnist.train.images
labels = mnist.train.labels

### 02. Model TensorFlow로 정의하기

- TODO1 : `features`를 `feed`할 placeholder(`x`)를 정의
- TODO2 : Weight(`W`)와 bias(`b`)를 `Variable`로 초기값 0으로 정의
- TODO3 & TODO4: Model(`model`) 정의
- `num_hidden` : 20
$$
\hat{y_i^k} = softmax((\sigma(xW_1+b_1)) W_2 + b_2)_k
$$

In [None]:
# TODO1
x = tf.placeholder(tf.float32, [None, 784])

# TODO2
num_hidden = 20
W_hidden = None
b_hidden = tf.Variable(tf.random_normal([num_hidden]))

W_out = None
b_out = tf.Variable(tf.random_normal([10]))

# TODO3
hidden = None
# TODO4
model = None

print(x.shape)
print(W_hidden.shape)
print(b_hidden.shape)
print(W_out.shape)
print(b_out.shape)
print(model)

### 03. Cross-Entropy 정의하기

$$
\frac{1}{m}\sum_{i=1}^m\sum_{k=1}^{10} -y_i^k \log(\hat{y_i^k})
$$

In [None]:
y = tf.placeholder(tf.float32, [None, 10])

loss = tf.reduce_mean(tf.reduce_sum(-y * tf.log(model),1))

print(loss)

In [None]:
# For testing accuracy
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(model,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

### 06. Stochastic Gradient Descent에 사용할 Hyper-Parameter 설정
1. Optimizer : `AdamOptimizer`
1. `batch_size` : `128`
1. `lerning rate` : `0.01`
1. `MaxEpoch` : `21`
1. Train Step

In [None]:
batch_size = 128
lr = 0.01
MaxEpochs = 21
optimizer = tf.train.AdamOptimizer(lr)
train = optimizer.minimize(loss)

### 07. Stochastic Gradient Descent(SGD)
- `mnist.train.next_batch()`사용하여 batch 생성하기
- batch만 사용하여, train 하기

In [None]:
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
for epoch in range(MaxEpochs):
    for step in range(len(mnist.train.images) // batch_size + 1):
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        sess.run(train, feed_dict={x:batch_xs, y:batch_ys})
        if step % 200 == 0:
            curr_loss = sess.run(loss, feed_dict={x:features, y:labels})
            curr_acc = sess.run(accuracy, feed_dict={x:mnist.test.images, y:mnist.test.labels})
            print(epoch, step, curr_loss, curr_acc)
    print("")

## Prediction

In [None]:
test_id = 9229
test_image = mnist.test.images[test_id]
test_label = mnist.test.labels[test_id]

out = sess.run(model, feed_dict={x:[test_image]})
print("Predicted Label : {0} (Real Label : {1})".format(np.argmax(out), np.argmax(test_label)))

In [None]:
img = test_image.reshape(28,28)
label = np.argmax(test_label)
plt.imshow(img, cmap='gray')
plt.axis('off')
plt.title("Predicted Label : {0} (Real Label : {1})".format(np.argmax(out), np.argmax(test_label)))
plt.show()

## MSE
이번엔 아래와 같은 loss function을 사용하여, SGD를 구현해보세요.

$$
E = \frac{1}{m}\sum_{i=1}^m \sum_{k=1}^{10} |\hat{y_i^k}-y_i^k|^2
$$

In [None]:
features = mnist.train.images
labels = mnist.train.labels

x = tf.placeholder(tf.float32, [None, 784])
num_hidden = 20
W_hidden = tf.Variable(tf.random_normal([784, num_hidden]))
b_hidden = tf.Variable(tf.random_normal([num_hidden]))

W_out = tf.Variable(tf.random_normal([num_hidden, 10]))
b_out = tf.Variable(tf.random_normal([10]))

hidden = tf.nn.sigmoid(tf.matmul(x, W_hidden) + b_hidden)
model = tf.nn.softmax(tf.matmul(hidden, W_out) + b_out)

y = tf.placeholder(tf.float32, [None, 10])

loss = tf.reduce_mean(tf.reduce_sum(tf.square(model - y),1))

correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(model,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

lr = 0.01
batch_size = 128
MaxEpochs = 21
optimizer = tf.train.AdamOptimizer(lr)
train = optimizer.minimize(loss)

init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
for epoch in range(MaxEpochs):
    # TODO 10
    for step in range(len(mnist.train.images) // batch_size + 1):
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        # TODO 11
        sess.run(train, feed_dict={x:batch_xs, y:batch_ys})
        if step % 200 == 0:
            curr_loss = sess.run(loss, feed_dict={x:features, y:labels})
            curr_acc = sess.run(accuracy, feed_dict={x:mnist.test.images, y:mnist.test.labels})
            print(epoch, step, curr_loss, curr_acc)
    print("")