# v2
    使用 Data augmentation 生成資料，增加普適性 (泛化性)

In [1]:
!pip install efficientnet

Collecting efficientnet
  Downloading https://files.pythonhosted.org/packages/28/91/67848a143b54c331605bfba5fd31cf4e9db13d2e429d103fe807acc3bcf4/efficientnet-1.1.0-py3-none-any.whl
Installing collected packages: efficientnet
Successfully installed efficientnet-1.1.0


In [2]:
!nvidia-smi

Mon Jun 22 16:33:29 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.36.06    Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   40C    P0    26W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                 ERR! |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [3]:
%tensorflow_version 2.x
import zipfile
import os
import csv
import numpy as np
import shutil
from tqdm.auto import tqdm
import matplotlib.pyplot as plt

# 載入資料

In [None]:
IMAGE_SIZE = 224
BATCH_SIZE = 10
STEPS = 64
NUM_EPOCHS = 40
NUM_CLASSES = 3
DATASET_PATH = '/content/C1-P1_Train_Dev/C1-P1/'       # 資料路徑

In [None]:
def image_class():
    train_test_split = 0.2

    f = zipfile.ZipFile("/content/drive/My Drive/Colab Notebooks/AIMango_sample/C1-P1_Train_Dev.zip")
    f.extractall("./")

    # 依分類建立資料夾
    if not os.path.isdir(DATASET_PATH + 'train'):
        os.mkdir(DATASET_PATH + 'train')
    if not os.path.isdir(DATASET_PATH + 'test'):
        os.mkdir(DATASET_PATH + 'test')
    if not os.path.isdir(DATASET_PATH + 'train/A'):
        os.mkdir(DATASET_PATH + 'train/A')
    if not os.path.isdir(DATASET_PATH + 'train/B'):
        os.mkdir(DATASET_PATH + 'train/B')
    if not os.path.isdir(DATASET_PATH + 'train/C'):
        os.mkdir(DATASET_PATH + 'train/C')
    if not os.path.isdir(DATASET_PATH + 'test/A'):
        os.mkdir(DATASET_PATH + 'test/A')
    if not os.path.isdir(DATASET_PATH + 'test/B'):
        os.mkdir(DATASET_PATH + 'test/B')
    if not os.path.isdir(DATASET_PATH + 'test/C'):
        os.mkdir(DATASET_PATH + 'test/C')
    
    with open('/content/C1-P1_Train_Dev/C1-P1.csv') as csvfile:
        rows = csv.reader(csvfile)

        level_path = {'A': [], 'B': [], 'C': []}
        for row in rows:
            level_path[row[1][-1]].append(DATASET_PATH + row[0])

        for lvl in level_path:            
            count = int(len(level_path[lvl]) * (1 - train_test_split))
            
            for i, img_path in enumerate(tqdm(level_path[lvl])):
                if i < count:
                    shutil.move(img_path, DATASET_PATH + 'train/' + lvl)
                else:
                    shutil.move(img_path, DATASET_PATH + 'test/' + lvl)

if not os.path.exists('/content/C1-P1_Train_Dev'):
    image_class()

# 資料增強 ImageDataGenerator

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

trn_dagen = ImageDataGenerator(rescale= 1. / 255,
                               rotation_range=40,
                               width_shift_range=0.2,
                               height_shift_range=0.2,
                               shear_range=0.2,
                               zoom_range=0.2,
                               channel_shift_range=10,
                               horizontal_flip=True,
                               fill_mode='nearest',
                               validation_split=0.2,
                               dtype='float32')

val_dagen = ImageDataGenerator(rescale= 1./ 255,
                               validation_split=0.2,
                               dtype='float32')

trn_gen = trn_dagen.flow_from_directory(DATASET_PATH + 'train',
                                        target_size=(IMAGE_SIZE, IMAGE_SIZE),
                                        batch_size=BATCH_SIZE,
                                        class_mode='categorical',
                                        shuffle=True,
                                        subset='training')

val_gen = val_dagen.flow_from_directory(DATASET_PATH + 'train',
                                        target_size=(IMAGE_SIZE, IMAGE_SIZE),
                                        batch_size=BATCH_SIZE,
                                        class_mode='categorical',
                                        shuffle=True,
                                        subset='validation')

Found 5119 images belonging to 3 classes.
Found 1022 images belonging to 3 classes.


# CNN、FC 模型建立

In [None]:
import efficientnet.tfkeras as efn
from tensorflow.keras import Model
import tensorflow.keras.layers as L

net = efn.EfficientNetB7(input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3),
                         weights='noisy-student',
                         include_top=False)

x = net.output
x = L.Flatten()(x)
x = L.Dropout(0.5)(x)
x = L.Dense(NUM_CLASSES, activation='softmax')(x)

model = Model(inputs=net.input, outputs=x)

# 模型編譯 optimizer、loss、metrics

In [None]:
from tensorflow.keras.optimizers import Adam

model.compile(optimizer=Adam(),
              loss='categorical_crossentropy',
              metrics=['categorical_accuracy'])

# 模型訓練

In [None]:
history = model.fit(trn_gen,
                    steps_per_epoch=trn_gen.samples // 128,
                    epochs=NUM_EPOCHS, 
                    verbose=1,
                    validation_data=val_gen,
                    validation_steps=val_gen.samples // 128)

Epoch 1/40
 6/39 [===>..........................] - ETA: 13:11 - loss: 6.1278 - categorical_accuracy: 0.3833

KeyboardInterrupt: ignored

# Plot accuracy

In [None]:
plt.plot(history.history['categorical_accuracy'])
plt.plot(history.history['val_categorical_accuracy'])
plt.legend(['acc', 'val_acc'])
plt.title('acc')

# Plot loss 

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.legend(['loss', 'val_loss'])
plt.title('loss')

# 評估模型

In [None]:
gobj = ImageDataGenerator(rescale=1./255, dtype='float32')

eva_gen = gobj.flow_from_directory(DATASET_PATH + 'test',
                                   target_size=(IMAGE_SIZE, IMAGE_SIZE),
                                   batch_size=BATCH_SIZE,
                                   class_mode='categorical')

loss, acc = model.evaluate(eva_gen, steps=STEPS)
print('評估的準確率: ', acc)

In [None]:
pre_gen = gobj.flow_from_directory(DATASET_PATH + 'test',
                                   target_size=(IMAGE_SIZE, IMAGE_SIZE),
                                   batch_size=10,
                                   class_mode=None)
ans = model.predict(pre_gen, steps=1)
print('預測的結果: ', ans.round(1))