# Convolutional neural network (version 2)
- 미리 만들어진 모델을 불러와서 학습을 해봅시다.
- nets/kthvgg_slim.py 참고

In [None]:
import random
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

from pprint import pprint
from tensorflow.examples.tutorials.mnist import input_data

from mypackage.vgg_slim import vgg

## 0. Load MNIST data

In [None]:
mnist = input_data.read_data_sets("MNIST_data/", one_hot=False)
X_trn, Y_trn = mnist.train.images, mnist.train.labels
X_val, Y_val = mnist.validation.images, mnist.validation.labels
X_test, Y_test = mnist.test.images, mnist.test.labels

In [None]:
print(Y_trn[0:5])

In [None]:
num_trn = Y_trn.shape[0]
num_val = Y_val.shape[0]
num_test = Y_test.shape[0]

print("Number of training points: ", num_trn)
print("Number of validation points: ", num_val)
print("Number of test points: ", num_test)

In [None]:
dim_X = X_trn.shape[1]
pixel_X = int(np.sqrt(dim_X)) # np.sqrt의 출력이 float32이므로, 이를 int 자료형으로 변경
# dim_Y = Y_trn.shape[1]

print("Dimension of X: %d (%d x %d)" % (dim_X, pixel_X, pixel_X))
# print("Dimension of Y: ", dim_Y)
print("Dimension of Y: None.. Y is a array of integers.")

In [None]:
X_trn = X_trn.reshape(-1, 28, 28, 1)
X_val = X_val.reshape(-1, 28, 28, 1)
X_test = X_test.reshape(-1, 28, 28, 1)

In [None]:
X_trn.shape

## 1. Build the graph
Tensorflow에서는 모델을 'graph'로 구현한다.

### 1.1. Placeholder for inputs and outputs
- Shape of the placeholder for inputs: [batch_size, input_dimension]
- Shape of the placeholder for outputs: [batch_size]
- Placeholder의 batch_size를 None으로 하면, placeholder에 들어가기 전에 batch size를 조절해야 함

In [None]:
X = tf.placeholder(tf.float32, [None, 28, 28, 1], name="Inputs")
Y = tf.placeholder(tf.int32, [None], name="Labels")

## pre-designed model

In [None]:
logits, end_points = vgg(inputs=X, num_classes=10)
print(logits)

In [None]:
# Print my end points
pprint(end_points)

## 2. Training

### 2.1. Loss function
- Classification 문제에서 제일 많이 사용하는 loss function은 **cross-entropy**

두 가지 옵션이 있음.
- tf.nn.softmax_cross_entropy_with_logits: Y가 one-hot encoded 되어 있을 때
- tf.nn.sparse_softmax_cross_entropy_with_logits: Y가 class에 대한 index 값일 때)

### cost function

In [None]:
cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=Y))

### 2.2. Training operator
- First, define the oprimizer. (**optimizer**)
- And then, define training operator. (**train_op**)

자주 사용하는 optimizer로는 다음과 같은 것들이 있음.
- tf.train.GradientDescentOptimizer
- tf.train.AdagradOptimizer
- tf.train.MomentumOptimizer
- **tf.train.AdamOptimizer** (많은 연구자들이 사용)

자세한 사항은 [TensorFlow API_guides: Training](https://www.tensorflow.org/api_guides/python/train) 참조!

### optimizer

In [None]:
optimizer = tf.train.AdamOptimizer(learning_rate=0.01)
train_op = optimizer.minimize(cost)

### 2.3. Predicting operator
- correct_prediction: boolean (True or False)
- accuracy: 먼저 correct_prediction을 float32로 변환 후에 배치 내 평균을 계산

**tf.nn.in_top_k(x, y, k)**
- tf.nn.in_top_k(x, y, k)는 prediction x의 상위 k개의 결과가 true label y를 포함하는지를 계산
- 이에 대한 output은 boolean 으로 나오므로, 이를 0, 1로 바꿔주기 위해서 tf.cast를 이용하여 float32로 변환한 이후에 accuracy를 계산한다.

In [None]:
correct_prediction = tf.nn.in_top_k(logits, Y, 1)

### Accuracy

In [None]:
print(type(correct_prediction))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(type(correct_prediction))

In [None]:
print(correct_prediction)
print(accuracy)

### 2.4. Initializer

In [None]:
init = tf.global_variables_initializer()

### 2.5. Run the session
- 앞서 만든 graph, operator 등을 돌리는 과정

In [None]:
NUM_EPOCHS = 30
BATCH_SIZE = 128

In [None]:
# Batch 인덱스 생성
start_idx = range(0, num_trn, BATCH_SIZE)
end_idx = range(BATCH_SIZE, num_trn + 1, BATCH_SIZE)
for start, end in zip(start_idx, end_idx): print(start, '\t', end)

In [None]:
trn_cost_list = []
val_cost_list = []
val_accuracy_list = []

with tf.Session() as sess, tf.device("/cpu:0"):
    # Variable initialization
    sess.run(init)
    
    # Indices for constructing batches
    start_idx = range(0, num_trn, BATCH_SIZE)
    end_idx = range(BATCH_SIZE, num_trn + 1, BATCH_SIZE)
    
    NUM_BATCHES = len(start_idx)
    
    for epoch in range(0,NUM_EPOCHS):

        # Set "trn_cost" as 0 before starting the epoch
        trn_cost = 0
        
        # Training phase
        for start, end in zip(start_idx, end_idx):

            # Construct the input batch
            batch_xs = X_trn[start:end]
            batch_ys = Y_trn[start:end]
            
            # Calculate cost
            tmp_cost, _ = sess.run([cost, train_op], feed_dict={X: batch_xs, Y: batch_ys})
            trn_cost += tmp_cost
        
        trn_cost = trn_cost / NUM_BATCHES
        trn_cost_list.append(trn_cost)
        print("[{} epoch] training cost {:0.4f}".format((epoch + 1), trn_cost))
        
        # Validation phase
        if (epoch + 1) % 10 == 0:
            val_cost, val_accuracy = sess.run([cost, accuracy], feed_dict={X: X_val, Y: Y_val})
            val_cost_list.append(val_cost)
            val_accuracy_list.append(val_accuracy)
            print("\t[{} epoch] validation accuracy {:0.4f}".format((epoch + 1), val_accuracy))
            
    # Test phase
    test_accuracy = sess.run(accuracy, feed_dict={X: X_test, Y: Y_test})
    print("\n")
    print("Test accuracy: {:0.4f}".format(test_accuracy))

##  Cost plot

In [None]:
x = np.arange(50)
plt.plot(x, trn_cost_list)
plt.plot(x, test_cost_list)
plt.title("cross entropy loss")
plt.legend(["train loss", "test_loss"])
plt.xlabel("epoch")
plt.ylabel("cross entropy")

In [None]:
x = np.arange(50)
plt.plot(x, val_accuracy_list)
plt.title("prediction accuracy")
plt.xlabel("epoch")
plt.ylabel("accuracy")