# 1. 데이터 전처리 (Data preprocessing)

### 1. 1. 데이터 로드하기

In [None]:
import numpy as np

data = np.load('/content/data(224).npz')

### 1. 2. 데이터 섞기

In [4]:
# make shuffled index -> shuffle dataset
shuffled_index = np.random.permutation(np.arange(data['data'].shape[0]))

### 1. 3. 데이터 나누기

In [None]:
# setting data
x_data, y_data = data['data'][shuffled_index], data['label'][shuffled_index]

x_train, y_train = x_data[:600], y_data[:600]
x_valid, y_valid = x_data[600:800], y_data[600:800]
x_test, y_test = x_data[800:], y_data[800:]

print(x_train.shape, x_valid.shape, x_test.shape)

(600, 224, 224, 1) (200, 224, 224, 1) (200, 224, 224, 1)


### 1. 4. 데이터 증강

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

data_gen = ImageDataGenerator(
        rotation_range=18,
        width_shift_range=0.14,
        height_shift_range=0.14,
        shear_range=0.2,
        zoom_range=0.2,
        fill_mode='nearest')

x_train_augmentated = []
y_train_augmentated = []

augmentation_num = 15
for batch in data_gen.flow(x_train, y_train, batch_size=x_train.shape[0]):
    for d in batch[0]:
        x_train_augmentated.append(d)
    for d in batch[1]:
        y_train_augmentated.append(d)
    augmentation_num -= 1
    if augmentation_num <= 0:
      break
    print(f'{augmentation_num} steps remaining')

x_train_augmentated = np.array(x_train_augmentated)
y_train_augmentated = np.array(y_train_augmentated)

print(x_train_augmentated.shape)
print(y_train)

### 1. 5. 데이터 정규화

In [None]:
# dataset standardized
mean_vals = np.mean(x_train_augmentated, axis=0) # axis=0: 요소간 평균, axis=1: 그룹의 열간 평균, axis=2: 그룹의 행간 평균, axis=(1, 2): 그룹 내 평균
std_val = np.std(x_train_augmentated) # np.std 를 axis=0 기준으로 구한다면, 모든 dataset 에서 같은 값을 가진 픽셀의 경우 np.std == 0 이고 std 로 나누었을 때 divided by zero error can be raised

x_train_augmentated_centered = (x_train_augmentated - mean_vals) / std_val
x_valid_centered = (x_valid - mean_vals) / std_val
x_test_centered = (x_test - mean_vals) / std_val

print('평균: ', np.mean(x_train_augmentated_centered))
print('표준편차', np.std(x_train_augmentated_centered))

# 2.  모델 구현 (model implementation)

### 2. 1. 모델 구현

In [None]:
# model structure
from tensorflow.keras import layers, models

def create_model():
  model = models.Sequential()

  # feature extractor
  # block1
  model.add(layers.Conv2D(32, (3, 3), padding='same', activation='relu', input_shape=(224, 224, 1), name='block1_conv1'))
  model.add(layers.MaxPool2D((2, 2), name='block1_pool'))

  # block2
  model.add(layers.Conv2D(64, (3, 3), padding='same', activation='relu', input_shape=(112, 112, 1), name='block2_conv1'))
  model.add(layers.MaxPool2D((2, 2), name='block2_pool'))

  # block4
  model.add(layers.Conv2D(128, (3, 3), padding='same', activation='relu', input_shape=(56, 56, 1), name='block3_conv1'))
  model.add(layers.MaxPool2D((2, 2), name='block4_pool'))

  # block5
  model.add(layers.Conv2D(256, (3, 3), padding='same', activation='relu', input_shape=(28, 28, 1), name='block4_conv1'))
  model.add(layers.MaxPool2D((2, 2), name='block5_pool'))

  # block6
  model.add(layers.Conv2D(512, (3, 3), padding='same', activation='relu', input_shape=(14, 14, 1), name='block5_conv1'))
  model.add(layers.MaxPool2D((2, 2), name='block6_pool'))

  # classfier
  model.add(layers.AveragePooling2D((7, 7)))


  model.add(layers.Dropout(0.5))
  model.add(layers.Dense(128, activation='relu'))

  model.add(layers.Dropout(0.5))
  model.add(layers.Dense(128, activation='relu'))

  model.add(layers.Dense(4, activation='softmax'))
  model.add(layers.Flatten())

  return model

model = create_model()
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])

### 2. 2. 모델 설명

In [None]:
model.summary()

# 3. 모델 훈련

### 3. 1. 텐서보드 활용하기

In [None]:
# using tensorboard
# !pip install jupyter-tensorboard
%load_ext tensorboard
%tensorboard --logdir /content/logs

### 3. 2. 모델 훈련하기

In [None]:
import time
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard

callback_list = [ModelCheckpoint(filepath='ct_244_cnn_cp.keras', monitor='val_loss', save_best_only=True), TensorBoard(log_dir="/content/logs/{}".format(time.asctime()))]
history = model.fit(x_train_augmentated_centered, y_train_augmentated, batch_size=32, epochs=50, validation_data=(x_valid_centered, y_valid), callbacks=callback_list)

# 4. 모델 평가

### 4. 1. 모델 평가하기

In [None]:
model.evaluate(x_test_centered, y_test)
model.evaluate(x_valid_centered, y_valid)

t0 = time.time()
model.predict(x_test_centered)
t1 = time.time()

print(f"tot: {t1-t0}s, {(t1-t0)/x_test_centered.shape[0]}s/img" )