In [None]:
# tf.keras.layers.BatchNormalization()

In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.datasets import mnist
import os

In [2]:
def normalize(train_data, test_data):
    train_data = train_data.astype(np.float32) / 255.0
    test_data = test_data.astype(np.float32) / 255.0

    return train_data, test_data

In [3]:
def load_mnist():
    (train_data, train_labels), (test_data, test_labels) = mnist.load_data()
    
    # tensorflow가 인풋으로 받은 shape는 [batch_size, height, width, channel]인데
    # mnist의 경우 graysclae image이기 때문에 채널이 1인 것이 생략되어서 3개의 shape
    # 그래서 채널을 하나 더 만들어줘야 함
    train_data = np.expand_dims(train_data, axis=-1) #[N,28,28] -> [N,28,28,1]
    # axis=-1은 여기서 axis=3과 같음. (맨 끝에 채널을 붙여달라는 말이므로)    
    test_data = np.expand_dims(test_data, axis=-1) #[N,28,28] -> [N,28,28,1]
    
    train_data, test_data = normalize(train_data, test_data) # [0~255] -> [0~1]

    # one-hot-encoding
    train_labels = to_categorical(train_labels, 10) #[N,] -> [N,10]
    # label 개수가 10개 이므로 10을 넣음
    test_labels = to_categorical(test_labels, 10) #[N,] -> [N,10]
    return train_data, train_labels, test_data, test_labels

In [4]:
# shape 펼쳐주기
def flatten():
    return tf.keras.layers.Flatten()

# fully connected layer
def dense(channel, weight_init):
    return tf.keras.layers.Dense(units=channel, use_bias = True,
                                 kernel_initializer=weight_init)
# units=channel : output으로 나가는 채널을 몇개로 설정한 것인지
# use_bias : bias를 사용할 지
# kernel_initialier : weight initializer

def relu():
    return tf.keras.layers.Activation(tf.keras.activations.relu)

def batch_norm():
    return tf.keras.layers.BatchNormalization()

In [5]:
class create_model(tf.keras.Model):
    def __init__(self, label_dim): # label_dim: 최종적으로 몇 개의 아웃풋을 낼 건지
        super(create_model, self).__init__()
        weight_init = tf.keras.initializers.he_uniform() # HE
        #tf.keras.initializers.glorot_uniform() # Xavier
        
        self.model = tf.keras.Sequential() 
        # convolution이나 fully connected를 층층이 쌓아가는(리스트에 더해주는) 과정
        # list 자료구조 타입
        
        self.model.add(flatten()) # [N,28,28,1] -> [N, 784] # full connected를 할 거기 때문에
        
        # for문 코드 : 총 4층
        for i in range(2):
            # [N,784] -> [N,256] -> [N,256]
            self.model.add(dense(256, weight_init))
            self.model.add(batch_norm())
            self.model.add(relu())
        # layer, norm, activation 순으로 코드를 보통 작성함
        # norm, activation, full connected layer(혹은 convolution layer)) 순도 사용하긴 함
        self.model.add(dense(label_dim,weight_init)) # [N,256] -> [N,10]
    
    def call(self, x, training=None, mask=None):
        x = self.model(x)
        return x

In [6]:
def loss_fn(model, images, labels):
    logits = model(images, training=True)
    # training=True : dropout 사용하겠다.
    loss = tf.reduce_mean(tf.keras.losses.categorical_crossentropy(y_pred=logits, y_true=labels, 
                                                                   from_logits=True))
    return loss

def accuracy_fn(model, images, labels):
    logits = model(images, training=False)
    # training=True : dropout 사용 안하겠다.
    prediction = tf.equal(tf.argmax(logits,-1), tf.argmax(labels,-1)) #bool
    # 숫자가 큰 값의 위치를 알려달라
    # logits와 labels의 shape는 [batch size, label_dim]인데
    # label_dim 여기서는 10인데 10 중에 가장 큰 값의 위치를 알려달라
    accuracy = tf.reduce_mean(tf.cast(prediction, tf.float32)) # tf.cast: bool > 숫자 형식
    return accuracy

def grad(model, images, labels):
    with tf.GradientTape() as tape:
        loss = loss_fn(model, images, labels)
    return tape.gradient(loss, model.variables)

In [7]:
train_x, train_y, test_x, test_y = load_mnist()

learning_rate = 0.001
batch_size = 128

training_epochs =1
training_iterations = len(train_x) // batch_size

label_dim = 10

In [8]:
train_dataset = tf.data.Dataset.from_tensor_slices((train_x, train_y)).\
    shuffle(buffer_size=100000).\
    prefetch(buffer_size=batch_size).\
    batch(batch_size).\
    repeat()
# shuffle(buffer_size=100000): train_x, train_y가 6만 인데 이것보다 큰 숫자 넣으면 됨
# prefetch(buffer_size=batch_size): 미리 메모리에 batch size 만큼 올려놔라

test_dataset = tf.data.Dataset.from_tensor_slices((test_x, test_y)).\
    shuffle(buffer_size=100000).\
    prefetch(buffer_size=len(test_x)).\
    batch(len(test_x)).\
    repeat()

In [11]:
train_iterator = tf.compat.v1.data.make_one_shot_iterator(train_dataset)
# train_dataset.make_one_shot_iterator()
test_iterator = tf.compat.v1.data.make_one_shot_iterator(test_dataset)
# test_dataset.make_one_shot_iterator()

network = create_model(label_dim)

optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)


In [9]:
""" Writer """
checkpoint_dir = 'checkpoints'
logs_dir = 'logs'

model_dir = 'nn_relu'

def check_folder(dir):
    if not os.path.exists(dir):
        os.makedirs(dir)
    return dir

checkpoint_dir = os.path.join(checkpoint_dir, model_dir)
check_folder(checkpoint_dir)
checkpoint_prefix = os.path.join(checkpoint_dir, model_dir)
logs_dir = os.path.join(logs_dir, model_dir)

In [12]:
train_flag = True

start_epoch = 0
start_iteration = 0
counter = 0
    
# train phase
checkpoint = tf.train.Checkpoint(dnn=network)
global_step = tf.compat.v1.train.get_or_create_global_step()
# tf.train.create_global_step()
# 각각의 weight가 몇번째 iteration의 weight인지 알려줌

for epoch in range(start_epoch, training_epochs):
    for idx in range(start_iteration, training_iterations):
        train_input, train_label = train_iterator.get_next()
        # image와 label을 받음
        
        grads= grad(network, train_input, train_label)
        optimizer.apply_gradients(grads_and_vars=zip(grads, network.variables),) 
                                  # global_step=global_step)
        
        train_loss = loss_fn(network, train_input, train_label)
        train_accuracy = accuracy_fn(network, train_input, train_label)

        test_input, test_label = test_iterator.get_next()
        test_accuracy = accuracy_fn(network, test_input, test_label)

        print("""Epoch: [%2d] [%5d/%5d],
              train_loss: %.8f,
              train_accuracy: %.4f,
              test_Accuracy: %.4f """\
                % (epoch, idx, training_iterations,
                train_loss, train_accuracy,test_accuracy))
        counter += 1
checkpoint.save(file_prefix=checkpoint_prefix + '-{}'.format(counter))

Epoch: [ 0] [    0/  468],
              train_loss: 1.99718428,
              train_accuracy: 0.3516,
              test_Accuracy: 0.1977 
Epoch: [ 0] [    1/  468],
              train_loss: 1.96930146,
              train_accuracy: 0.3828,
              test_Accuracy: 0.3642 
Epoch: [ 0] [    2/  468],
              train_loss: 1.76580381,
              train_accuracy: 0.5156,
              test_Accuracy: 0.4237 
Epoch: [ 0] [    3/  468],
              train_loss: 1.81092644,
              train_accuracy: 0.5078,
              test_Accuracy: 0.4887 
Epoch: [ 0] [    4/  468],
              train_loss: 1.55865240,
              train_accuracy: 0.6406,
              test_Accuracy: 0.5536 
Epoch: [ 0] [    5/  468],
              train_loss: 1.53645110,
              train_accuracy: 0.6250,
              test_Accuracy: 0.6211 
Epoch: [ 0] [    6/  468],
              train_loss: 1.36154962,
              train_accuracy: 0.6641,
              test_Accuracy: 0.6867 
Epoch: [ 0] [    7/ 

'checkpoints\\nn_relu\\nn_relu-468-1'