# Feed-forward neural network (version 2)
- a.k.a. Multi-Layer Perceptrons (MLP), Fully-connected network
- 다른 방식으로 네트워크를 구성해봅시다.

In [None]:
import random
from pprint import pprint
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

## 0. Load MNIST data
- 이번에는 one_hot=False로 데이터를 불러와봅시다.

In [None]:
# one_hot=False
mnist = input_data.read_data_sets("MNIST_idx3/", one_hot=False)

In [None]:
X_trn, Y_trn = mnist.train.images, mnist.train.labels
X_val, Y_val = mnist.validation.images, mnist.validation.labels
X_test, Y_test = mnist.test.images, mnist.test.labels

In [None]:
print(Y_trn[0:5])

In [None]:
num_trn = Y_trn.shape[0]
num_val = Y_val.shape[0]
num_test = Y_test.shape[0]

print("Number of training points: ", num_trn)
print("Number of validation points: ", num_val)
print("Number of test points: ", num_test)

In [None]:
dim_X = X_trn.shape[1]
pixel_X = int(np.sqrt(dim_X)) # np.sqrt의 출력이 float32이므로, 이를 int 자료형으로 변경
# dim_Y = Y_trn.shape[1]

print("Dimension of X: %d (%d x %d)" % (dim_X, pixel_X, pixel_X))
# print("Dimension of Y: ", dim_Y)
print("Dimension of Y: None.. Y is a array of integers.")

## 1. Build the graph
Tensorflow에서는 모델을 'graph'로 구현한다.

### 1.1. Placeholder for inputs and outputs
- Shape of the placeholder for inputs: [batch_size, input_dimension]
- Shape of the placeholder for outputs: [batch_size]
- Placeholder의 batch_size를 None으로 하면, placeholder에 들어가기 전에 batch size를 조절해야 함

In [None]:
X = tf.placeholder(tf.float32, [None, dim_X], name="Inputs")

In [None]:
Y = tf.placeholder(tf.int32, [None], name="Labels")

In [None]:
test = X.get_shape()
# test = X.get_shape()[1]
print(test)
# print(type(test))

### 1.2. Build the model

- Weight와 bias, 그래프 구조를 생성하는 함수를 만들어봅시다.
- 여기에서는 2개의 hidden layers를 생성하겠습니다.

In [None]:
def fully_connected(inputs, hidden_dim_1, hidden_dim_2, num_classes, scope='SimpleFCN'):
    """
    [fully_connected] 2개의 hidden layer를 갖는 feed-forward network 생성
    
    [Args]
      - inputs: 입력 데이터를 위한 placeholder
      - hidden_dim_1: 첫 번째 은닉층의 노드 수
      - hidden_dim_2: 두 번째 은닉층의 노드 수
      - num_classes: 예측하고자 하는 클래스의 수 (= 출력층의 노드 수))
      - Scope: default value ("SimpleFCN")
    """
    # Inputs에서 1차원의 텐서들이 placeholder로 들어온다고 가정
    input_dim = inputs.get_shape()[1]
    
    # tf.truncated_normal_initializer의 형태를 간소화
    trunc_normal = lambda stddev: tf.truncated_normal_initializer(mean=0.0, stddev=stddev)
    
    # tf.constant_initializer의 형태를 간소화
    constant = lambda value: tf.constant_initializer(value=value)
    
    # Define "end_points"
    end_points = {}
    
    with tf.variable_scope(scope):
        with tf.variable_scope('HiddenLayer_1'):
            W_h1 = tf.get_variable("weights", [input_dim, hidden_dim_1], initializer=trunc_normal(0.1))
            b_h1 = tf.get_variable("biases", [hidden_dim_1], initializer=constant(0.0))
            h1 = tf.nn.relu(tf.matmul(inputs, W_h1)+ b_h1, name="Activation")
            end_points['h1'] = h1
            
        with tf.variable_scope('hiddenLayer_2'):
            W_h2 = tf.get_variable("weights", [hidden_dim_1, hidden_dim_2], initializer=trunc_normal(0.09))
            b_h2 = tf.get_variable("biases", [hidden_dim_2], initializer=constant(0.01))
            h2 = tf.nn.relu(tf.matmul(h1, W_h2) + b_h2, name="Activation")
            end_points['h2'] = h2
            
        with tf.variable_scope('OutputLayer'):
            W_o = tf.get_variable("weights", [hidden_dim_2, num_classes], initializer=trunc_normal(0.1))
            b_o = tf.get_variable("biases", [num_classes], initializer=constant(0.0))
            with tf.variable_scope('Logits'): logits = tf.matmul(h2, W_o) + b_o
#             logits = tf.matmul(h2, W_o) + b_o
            end_points['logits'] = logits
    
    return logits, end_points
    

In [None]:
logits, end_points = fully_connected(inputs=X, hidden_dim_1=500, hidden_dim_2=300, num_classes=10)

In [None]:
print(logits)

In [None]:
# Print my end points
pprint(end_points)

In [None]:
# Print names of my variables
variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='SimpleFCN')
pprint([v.name for v in variables])

## 2. Training

### 2.1. Loss function
- Classification 문제에서 제일 많이 사용하는 loss function은 **cross-entropy**

두 가지 옵션이 있음.
- tf.nn.softmax_cross_entropy_with_logits: Y가 one-hot encoded 되어 있을 때
- tf.nn.sparse_softmax_cross_entropy_with_logits: Y가 class에 대한 index 값일 때)

In [None]:
cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=Y))

### 2.2. Training operator
- First, define the oprimizer. (**optimizer**)
- And then, define training operator. (**train_op**)

자주 사용하는 optimizer로는 다음과 같은 것들이 있음.
- tf.train.GradientDescentOptimizer
- tf.train.AdagradOptimizer
- tf.train.MomentumOptimizer
- **tf.train.AdamOptimizer** (많은 연구자들이 사용)

자세한 사항은 [TensorFlow API_guides: Training](https://www.tensorflow.org/api_guides/python/train) 참조!

In [None]:
optimizer = tf.train.AdamOptimizer(learning_rate=0.01)

In [None]:
train_op = optimizer.minimize(cost)

### 2.3. Predicting operator
- correct_prediction: boolean (True or False)
- accuracy: 먼저 correct_prediction을 float32로 변환 후에 배치 내 평균을 계산

**tf.nn.in_top_k(x, y, k)**
- tf.nn.in_top_k(x, y, k)는 prediction x의 상위 k개의 결과가 true label y를 포함하는지를 계산
- 이에 대한 output은 boolean 으로 나오므로, 이를 0, 1로 바꿔주기 위해서 tf.cast를 이용하여 float32로 변환한 이후에 accuracy를 계산한다.

In [None]:
correct_prediction = tf.nn.in_top_k(logits, Y, 1)

In [None]:
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [None]:
print(correct_prediction)
print(accuracy)

### 2.4. Initializer

In [None]:
init = tf.global_variables_initializer()

### 2.5. Run the session
- 앞서 만든 graph, operator 등을 돌리는 과정

In [None]:
NUM_EPOCHS = 30
BATCH_SIZE = 100

In [None]:
# Batch 인덱스 생성
start_idx = list(range(0, num_trn, BATCH_SIZE))
end_idx = list(range(BATCH_SIZE, num_trn + 1, BATCH_SIZE))
for start, end in zip(start_idx, end_idx): print(start, '\t', end)

In [None]:
# Batch의 개수
print(len(start_idx))

In [None]:
trn_cost_list = list()
val_cost_list = list()
val_accuracy_list = list()

In [None]:
with tf.Session() as sess, tf.device("/cpu:0"):
    # Variable initialization
    sess.run(init)
    
    # Indices for constructing batches
    start_idx = range(0, num_trn, BATCH_SIZE)
    end_idx = range(BATCH_SIZE, num_trn + 1, BATCH_SIZE)
    
    NUM_BATCHES = len(start_idx)
    
    for epoch in range(0,NUM_EPOCHS):

        # Set "trn_cost" as 0 before starting the epoch
        trn_cost = 0
        
        # Training phase
        for start, end in zip(start_idx, end_idx):

            # Construct the input batch
            batch_xs = X_trn[start:end]
            batch_ys = Y_trn[start:end]
            
            # Calculate cost
            tmp_cost, _ = sess.run([cost, train_op], feed_dict={X: batch_xs, Y: batch_ys})
            trn_cost += tmp_cost
        
        trn_cost = trn_cost / NUM_BATCHES
        trn_cost_list.append(trn_cost)
        print("[{} epoch] training cost {:0.4f}".format((epoch + 1), trn_cost))
        
        # Validation phase
        if (epoch + 1) % 10 == 0:
            val_cost, val_accuracy = sess.run([cost, accuracy], feed_dict={X: X_val, Y: Y_val})
            val_cost_list.append(val_cost)
            val_accuracy_list.append(val_accuracy)
            print("\t[{} epoch] validation accuracy {:0.4f}".format((epoch + 1), val_accuracy))
            
    # Test phase
    test_accuracy = sess.run(accuracy, feed_dict={X: X_test, Y: Y_test})
    print("\n")
    print("Test accuracy: {:0.4f}".format(test_accuracy))