In [None]:
import numpy as np
import tensorflow as tf
import matplotlib.pylab as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [None]:
# Data Load
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=False)
mnist.train.labels[0]

In [None]:
# Data Load
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
mnist.train.labels[0]

In [None]:
from collections import Counter
freq = Counter([np.argmax(label) for label in mnist.train.labels])
for k in range(0,10):
    print("label {0} : {1}개".format(k, freq[k]))

In [None]:
print(mnist.train.images[0].shape)
print(mnist.train.images[0])

In [None]:
plt.figure(figsize=(16,8))
for k in range(32):
    img = mnist.train.images[k].reshape(28,28)
    label = np.argmax(mnist.train.labels[k])
    plt.subplot(4,8,1+k)
    plt.imshow(img, cmap='gray')
    plt.axis('off')
    plt.title(label)
plt.show()

In [None]:
print("Min : {0}, Max : {1}".format(mnist.train.images[0].min(), mnist.train.images[0].max()))

## Multi-label Classification
$$
\min_{W,b} \frac{1}{m}\sum_{i=1}^m\sum_{k=1}^{10} -y_i^k \log(\hat{y_i^k})
$$
where $\hat{y_i^k} = softmax(x_i W + b)_k$

1. Forward Model : $\hat{y_i^k} = softmax(x_i W + b)_k$
1. Cross-Entropy : $\frac{1}{m}\sum_{i=1}^m\sum_{k=1}^{10} -y_i^k \log(\hat{y_i^k})$



### 01. Model에 맞는 feature 정리
For $i=1,2,\cdots,m$,
$$
\texttt{features}_i = [\texttt{pixel}^i_1, \texttt{pixel}^i_2,\cdots, \texttt{pixel}^i_{784}]
$$

- TODO1 : mnist.train.images가 어떤 구조로 되어 있는지 파악하고, 위의 feature에 맞게 정리가 필요하다면 정리하세요.

In [None]:
# TODO1
print(mnist.train.images.shape)
features = mnist.train.images
labels = mnist.train.labels

### 02. Model TensorFlow로 정의하기

- TODO2 : `features`를 `feed`할 placeholder(`x`)를 정의
- TODO3 : Weight(`W`)와 bias(`b`)를 `Variable`로 초기값 0으로 정의
- TODO4 : Model(`model`) 정의

$$
\hat{y_i^k} = softmax(x_i W + b)_k
$$

In [None]:
# TODO2
x = tf.placeholder(tf.float32, shape=(None, None))

# TODO3
W = tf.Variable(tf.zeros([None, None]))
b = tf.Variable(tf.zeros([None]))

# TODO4
model = tf.nn.softmax(tf.matmul(x, W) + b)

print(x.shape)
print(W.shape)
print(b.shape)
print(model)

### 03. Cross-Entropy 정의하기

- TODO5 : `labels`를 `feed`할 `placeholder`(`y`) 정의하기
- TODO6 : Corss-Entropy(`loss`) 정의하기 : `tf.

$$
\frac{1}{m}\sum_{i=1}^m\sum_{k=1}^{10} -y_i^k \log(\hat{y_i^k})
$$

In [None]:
# TODO6
y = tf.placeholder(tf.flo32, shape=(None, 10))

# TODO7
loss = tf.reduce_mean(tf.reduce_sum(None,1))

print(loss)

### 04. Gradient Descent에 사용할 Hyper-Parameter 설정(TODO 7) 
1. Optimizer : `AdamOptimizer`
1. `lerning rate` : `0.01`
1. `MaxEpoch` : `51`
1. Train Step

In [None]:
# TODO7
lr = 0.01
MaxEpochs = 51
optimizer = tf.train.AdamOptimizer(lr)
train = optimizer.minimize(loss)

In [None]:
# For testing accuracy
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(model,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

### 05. Gradient Descent

- TODO8 : batch 사용하지 않고 모든 features와 labels을 다 feed 해줍니다.

In [None]:
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
for epoch in range(MaxEpochs):
    # TODO 8
    sess.run(train, feed_dict={x:features, y:labels})
    if epoch % 5 == 0:
        curr_W, curr_b, curr_loss = sess.run([W, b, loss], feed_dict={x:features, y:labels})
        curr_acc = sess.run(accuracy, feed_dict={x:mnist.test.images, y:mnist.test.labels})
        print(epoch, curr_loss, curr_acc)

### 06. Stochastic Gradient Descent에 사용할 Hyper-Parameter 설정(TODO 9) 
1. Optimizer : `AdamOptimizer`
1. `batch_size` : `128`
1. `lerning rate` : `0.01`
1. `MaxEpoch` : `5`
1. Train Step

In [None]:
# TODO9
batch_size = None
lr = None
MaxEpochs = None
optimizer = None
train = None

### 07. Stochastic Gradient Descent(SGD)
- TODO10 : `mnist.train.next_batch()`사용하여 batch 생성하기
- TODO11 : batch만 사용하여, train 하기

In [None]:
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
for epoch in range(MaxEpochs):
    # TODO 10
    for step in range(len(mnist.train.images) // batch_size + 1):
        batch_xs, batch_ys = mnist.train.next_batch(None)
        # TODO 11
        None
        if step % 50 == 0:
            curr_W, curr_b, curr_loss = sess.run([W, b, loss], feed_dict={x:features, y:labels})
            curr_acc = sess.run(accuracy, feed_dict={x:mnist.test.images, y:mnist.test.labels})
            print(epoch, step, curr_loss, curr_acc)
    print("")

## Prediction

In [None]:
test_id = 9229
test_image = mnist.test.images[test_id]
test_label = mnist.test.labels[test_id]

out = sess.run(model, feed_dict={x:[test_image]})
print("Predicted Label : {0} (Real Label : {1})".format(np.argmax(out), np.argmax(test_label)))

In [None]:
img = test_image.reshape(28,28)
label = np.argmax(test_label)
plt.imshow(img, cmap='gray')
plt.axis('off')
plt.title("Predicted Label : {0} (Real Label : {1})".format(np.argmax(out), np.argmax(test_label)))
plt.show()

## SSE
이번엔 아래와 같은 loss function을 사용하여, SGD를 구현해보세요.

$$
E = \sum_{i=1}^m \sum_{k=1}^{10} |\hat{y_i^k}-y_i^k|^2
$$

In [None]:
features = mnist.train.images
labels = mnist.train.labels

x = tf.placeholder(tf.float32, [None, 784])

W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))

model = tf.nn.softmax(tf.matmul(x, W) + b)

y = tf.placeholder(tf.float32, [None, 10])

# TODO12
loss = None

correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(model,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

lr = 0.01
batch_size = 128
MaxEpochs = 5
optimizer = tf.train.AdamOptimizer(lr)
train = optimizer.minimize(loss)

init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
for epoch in range(MaxEpochs):
    for step in range(len(mnist.train.images) // batch_size + 1):
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        sess.run(train, feed_dict={x:batch_xs, y:batch_ys})
        if step % 50 == 0:
            curr_W, curr_b, curr_loss = sess.run([W, b, loss], feed_dict={x:features, y:labels})
            curr_acc = sess.run(accuracy, feed_dict={x:mnist.test.images, y:mnist.test.labels})
            print(epoch, step, curr_loss, curr_acc)
    print("")