# Feed-forward neural network
- a.k.a. Multi-Layer Perceptrons (MLP), Fully-connected network

In [None]:
import random
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

## 0. Load MNIST data

In [None]:
mnist = input_data.read_data_sets("MNIST_idx3/", one_hot=True)

In [None]:
X_trn, Y_trn = mnist.train.images, mnist.train.labels
X_val, Y_val = mnist.validation.images, mnist.validation.labels
X_test, Y_test = mnist.test.images, mnist.test.labels

In [None]:
num_trn = Y_trn.shape[0]
num_val = Y_val.shape[0]
num_test = Y_test.shape[0]

print("Number of training points: ", num_trn)
print("Number of validation points: ", num_val)
print("Number of test points: ", num_test)

In [None]:
dim_X = X_trn.shape[1]
pixel_X = int(np.sqrt(dim_X)) # np.sqrt의 출력이 float32이므로, 이를 int 자료형으로 변경
dim_Y = Y_trn.shape[1]

print("Dimension of X: %d (%d x %d)" % (dim_X, pixel_X, pixel_X))
print("Dimension of Y: ", dim_Y)

## 1. Build the graph
Tensorflow에서는 모델을 'graph'로 구현한다.

### 1.1. Placeholder for inputs and outputs
- Shape of the placeholder for inputs: [batch_size, input_dimension]
- Shape of the placeholder for outputs: [batch_size, output_dimension]
- Placeholder의 batch_size를 None으로 하면, placeholder에 들어가기 전에 batch size를 조절해야 함

In [None]:
X = tf.placeholder(tf.float32, [None, dim_X])

In [None]:
Y = tf.placeholder(tf.float32, [None, dim_Y])

### 1.2. Add layers and variables

여기에서는 2개의 hidden layers를 생성하겠습니다.

### Hidden layer의 차원 정의 (hidden_dim_1, hidden_dim_2)

In [None]:
# Define hidden dimensions (number of nodes in each hidden layer)
hidden_dim_1 = 784
hidden_dim_2 = 300

### Weights와 biases 정의

- Input layer와 hidden layer 1 사이의 weights, biases (W_h1, b_h1)
- Hidden layer 1과 hidden layer 2 사이의 weights, biases (W_h2, b_h2)
- Hidden layer 2와 output layer 사이의 weights, biases (W_o, b_o)



In [None]:
# Weights and biases

W_h1 = tf.Variable(tf.truncated_normal(shape=[dim_X, hidden_dim_1], mean=0.0, stddev=0.01))
b_h1 = tf.Variable(tf.zeros([hidden_dim_1]))

W_h2 = tf.Variable(tf.truncated_normal(shape=[hidden_dim_1, hidden_dim_2], mean=0.0, stddev=0.01))
b_h2 = tf.Variable(tf.zeros([hidden_dim_2]))

W_o = tf.Variable(tf.truncated_normal(shape=[hidden_dim_2, dim_Y], mean=0.0, stddev=0.01))
b_o = tf.Variable(tf.zeros([dim_Y]))

### 모델 생성
- Activation function: tf.nn.relu

In [None]:
h1 = tf.nn.relu(tf.matmul(X, W_h1) + b_h1)
h2 = tf.nn.relu(tf.matmul(h1, W_h2) + b_h2)

logits = tf.matmul(h2, W_o) + b_o

In [None]:
print(h1)
print(h2)
print(logits)

In [None]:
print(h1.get_shape())
print(h2.get_shape())
print(logits.get_shape())

## 2. Training

### 2.1. Loss function
- Classification 문제에서 제일 많이 사용하는 loss function은 **cross-entropy**

Cross-entropy $ = - \sum_{\substack{j}}y_j \cdot \log(y'_j)$

In [None]:
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y)

In [None]:
cost = tf.reduce_mean(cross_entropy)

### 2.2. Training operator
- First, define the oprimizer. (**optimizer**)
- And then, define training operator. (**train_op**)

In [None]:
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)

In [None]:
train_op = optimizer.minimize(cost)

In [None]:
# # 다음과 같은 한 번에 많이 사용하기도 함
# train_op = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(cross_entropy)

In [None]:
print(optimizer)
print("=" * 80)
print(train_op)

### 2.3. Predicting operator
- **correct_prediction**: boolean (True or False)
- **accuracy**: 먼저 correct_prediction을 float32로 변환 후에 배치 내 평균을 계산

In [None]:
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(Y, 1))
# As you know, tf.argmax(logits, 1) == tf.argmax(Y_hat, 1)

In [None]:
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [None]:
print(correct_prediction)
print(accuracy)

### 2.4. Initializer

In [None]:
init = tf.global_variables_initializer()

### 2.4. Run the session
- 앞서 만든 graph, operator 등을 돌리는 과정

In [None]:
sess = tf.Session()

In [None]:
sess.run(init)

In [None]:
for i in range(10000):
    batch_xs, batch_ys = mnist.train.next_batch(100)
    sess.run(train_op, feed_dict={X: batch_xs, Y: batch_ys})
    if (i + 1) % 500 == 0:
        val_acc = sess.run(accuracy, feed_dict={X: X_val, Y: Y_val})
        print("[%d step] validation accuracy: %g" % (i + 1, val_acc))

In [None]:
print(sess.run(accuracy, feed_dict={X: mnist.test.images, Y: mnist.test.labels}))

In [None]:
type(accuracy)

In [None]:
sess.close()