In [1]:
# 設定超參數HyperParameters 
batch_size =  128
epochs = 20
image_size = 128
lr=0.01

imageGen = True

## 一、載入相關套件

In [2]:
import os
import cv2
import csv
import random
import time
import numpy as np
import pandas as pd
import matplotlib.image as mpimg # mpimg 用於讀取圖片
import matplotlib.pyplot as plt # plt 用於顯示圖片
import seaborn as sns

In [3]:
import tensorflow as tf

from tensorflow.keras.applications import ResNet50V2
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.resnet_v2 import preprocess_input
from tensorflow.keras import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import utils as np_utils
from tensorflow import keras
from tensorflow.keras import optimizers
from tensorflow.keras.layers import Input, Dropout, Flatten, Convolution2D, MaxPooling2D, ZeroPadding2D, Dense, Activation, Conv2DTranspose




In [4]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)

1 Physical GPUs, 1 Logical GPUs


In [5]:
# Print Start time
from datetime import datetime, timezone
import pytz

tpe = pytz.timezone('Asia/Taipei')
utc_start = datetime.now()
print("Start Time", tpe.fromutc(utc_start))

Start Time 2020-05-28 12:34:19.607564+08:00


## 製作標籤&資料集

In [6]:
from imgGen import imgGenFunc

In [7]:
flip = 0; rotate = 0
x_train, y_train = imgGenFunc(image_size, flip, rotate)

芒果圖片數量:  5600
x_l:  5600
y_l:  5600


## 建立Model

In [8]:
# ResNet50V2

In [9]:
model_resnet = ResNet50V2(weights='imagenet', include_top=False)

input = Input( shape=(image_size,image_size,3),name = 'image_input' )
output_conv = model_resnet(input)

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Downloading data from https://github.com/keras-team/keras-applications/releases/download/resnet/resnet50v2_weights_tf_dim_ordering_tf_kernels_notop.h5


In [10]:
## 由於該問題是分 3 類，需要在最後修改 Dense(3)，softmax 代表 output 是機率  
#Add the fully-connected layers 
p = Flatten()(output_conv)
p = Dense(256,activation = 'relu')(p)
p = Dropout(0.2)(p)
p = Dense(3, activation='softmax', name='predictions')(p)

model = Model(inputs=input, outputs=p)

model_2 = Model(inputs=input, outputs=p)
model_3 = Model(inputs=input, outputs=p)
model_4 = Model(inputs=input, outputs=p)

In [11]:
adam = optimizers.Adam(lr=lr)
model.compile(optimizer='adam',
              loss=tf.keras.losses.CategoricalCrossentropy(),
              metrics=['acc'])


model_2.compile(optimizer='adam',
              loss=tf.keras.losses.CategoricalCrossentropy(),
              metrics=['acc'])

model_3.compile(optimizer='adam',
              loss=tf.keras.losses.CategoricalCrossentropy(),
              metrics=['acc'])

model_4.compile(optimizer='adam',
              loss=tf.keras.losses.CategoricalCrossentropy(),
              metrics=['acc'])

## 訓練

In [12]:
# Model檔名設定
file_name = str(epochs)+'_'+str(batch_size)

In [13]:
# 加入EarlyStopping以及Tensorboard等回調函數
CB = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)
TB = keras.callbacks.TensorBoard(log_dir='./log'+"_"+file_name, histogram_freq=1)

In [None]:
### Start Training
history_1 = model.fit(
    x = x_train, 
    y = y_train, 
    batch_size = batch_size,
    shuffle=True,
    epochs = epochs,
    validation_split=0.1
)

Train on 5040 samples, validate on 560 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
1024/5040 [=====>........................] - ETA: 26s - loss: 0.0918 - acc: 0.9688

In [None]:
# Print Finish Training time
tpe = pytz.timezone('Asia/Taipei')
utc_finish = datetime.now()
print("Finish Training Time", tpe.fromutc(utc_finish))

difference = utc_finish-utc_start
seconds_in_day = 24 * 60 * 60
divmod(difference.days * seconds_in_day + difference.seconds, 60)

## 繪製Model學習成效

In [None]:
def plot_learning_curves(history):
    pd.DataFrame(history.history).plot(figsize=(8,5))
    plt.grid(True)
    plt.gca().set_ylim(0,2)
    plt.show()
    
plot_learning_curves(history_1)

## 測試集預測準確度

In [None]:
csvfile_test = open('dev.csv')
reader_test = csv.reader(csvfile_test)

In [None]:
# 讀取csv標籤
labels = []
for line in reader_test:
    tmp = [line[0],line[1]]
    # print tmp
    labels.append(tmp)
csvfile_test.close() 

In [None]:
labels = labels[1:]  # remove the first row
print("芒果圖片數量: ",len(labels))

In [None]:
# 轉換圖片的標籤
for i in range(len(labels)):
    labels[i][1] = labels[i][1].replace("A","0")
    labels[i][1] = labels[i][1].replace("B","1")
    labels[i][1] = labels[i][1].replace("C","2")

In [None]:
x_test = []
y_test = []

for i in range(len(labels)):
    img = cv2.imread("C1-P1_Dev/" + labels[i][0] )
    res = cv2.resize(img,(image_size,image_size),interpolation=cv2.INTER_LINEAR)
    res = img_to_array(res)
    x_test.append(res)    
    y_test.append(labels[i][1])

In [None]:
y_test_org = y_test

# 轉換至array的格式
x_test = np.array(x_test)
y_test = np.array(y_test)

In [None]:
# 轉換至float的格式
for i in range(len(x_test)):
    x_test[i] = x_test[i].astype('float32')
# x_test = x_test/255

In [None]:
# 標籤進行one-hotencoding
y_test = np_utils.to_categorical(y_test)

In [None]:
# 測試集標籤預測
y_pred = model.predict(x_test)

In [None]:
# 模型預測後的標籤
predict_label = np.argmax(y_pred,axis=1)
print(predict_label)
# print(len(predict_label))

In [None]:
# 模型原標籤
true_label = y_test_org
true_label = np.array(true_label)
print(true_label)
# print(len(true_label))

In [None]:
pd.crosstab(true_label,predict_label,rownames=['實際值'],colnames=['預測值'])

In [None]:
# 整體準確度
count = 0
for i in range(len(y_pred)):
    if(np.argmax(y_pred[i]) == np.argmax(y_test[i])): #argmax函数找到最大值的索引，即为其类别
        count += 1
score = count/len(y_pred)
print('正确率为:%.2f%s' % (score*100,'%'))

In [None]:
# 儲存模型相關參數
# model.save('h5/'+file_name+'.h5')

## Test 2

In [None]:
flip = 0; rotate = 1
x_train, y_train = imgGenFunc(image_size, flip, rotate)

In [None]:
# Print Start time
from datetime import datetime, timezone
import pytz

tpe = pytz.timezone('Asia/Taipei')
utc_start = datetime.now()
print("Start Time", tpe.fromutc(utc_start))

In [None]:
### Start Training
history_2 = model_2.fit(
    x = x_train, 
    y = y_train, 
    batch_size = batch_size,
    shuffle=True,
    epochs = epochs,
    validation_split=0.1
)

In [None]:
# Print Finish Training time
tpe = pytz.timezone('Asia/Taipei')
utc_finish = datetime.now()
print("Finish Training Time", tpe.fromutc(utc_finish))

difference = utc_finish-utc_start
seconds_in_day = 24 * 60 * 60
divmod(difference.days * seconds_in_day + difference.seconds, 60)

In [None]:
plot_learning_curves(history_2)

In [None]:
# 測試集標籤預測
y_pred = model_2.predict(x_test)

In [None]:
# 模型預測後的標籤
predict_label = np.argmax(y_pred,axis=1)
print(predict_label)
# print(len(predict_label))

In [None]:
pd.crosstab(true_label,predict_label,rownames=['實際值'],colnames=['預測值'])

In [None]:
# 整體準確度
count = 0
for i in range(len(y_pred)):
    if(np.argmax(y_pred[i]) == np.argmax(y_test[i])): #argmax函数找到最大值的索引，即为其类别
        count += 1
score = count/len(y_pred)
print('正确率为:%.2f%s' % (score*100,'%'))

## Test 3

In [None]:
flip = 1; rotate = 0
x_train, y_train = imgGenFunc(image_size, flip, rotate)

In [None]:
# Print Start time
from datetime import datetime, timezone
import pytz

tpe = pytz.timezone('Asia/Taipei')
utc_start = datetime.now()
print("Start Time", tpe.fromutc(utc_start))

In [None]:
### Start Training
history_3 = model_3.fit(
    x = x_train, 
    y = y_train, 
    batch_size = batch_size,
    shuffle=True,
    epochs = epochs,
    validation_split=0.1
)

In [None]:
# Print Finish Training time
tpe = pytz.timezone('Asia/Taipei')
utc_finish = datetime.now()
print("Finish Training Time", tpe.fromutc(utc_finish))

difference = utc_finish-utc_start
seconds_in_day = 24 * 60 * 60
divmod(difference.days * seconds_in_day + difference.seconds, 60)

In [None]:
plot_learning_curves(history_3)

In [None]:
# 測試集標籤預測
y_pred = model_3.predict(x_test)

In [None]:
# 模型預測後的標籤
predict_label = np.argmax(y_pred,axis=1)
print(predict_label)
# print(len(predict_label))

In [None]:
pd.crosstab(true_label,predict_label,rownames=['實際值'],colnames=['預測值'])

In [None]:
# 整體準確度
count = 0
for i in range(len(y_pred)):
    if(np.argmax(y_pred[i]) == np.argmax(y_test[i])): #argmax函数找到最大值的索引，即为其类别
        count += 1
score = count/len(y_pred)
print('正确率为:%.2f%s' % (score*100,'%'))

## Test 4

In [None]:
flip = 1; rotate = 1
x_train, y_train = imgGenFunc(image_size, flip, rotate)

In [None]:
# Print Start time
from datetime import datetime, timezone
import pytz

tpe = pytz.timezone('Asia/Taipei')
utc_start = datetime.now()
print("Start Time", tpe.fromutc(utc_start))

In [None]:
### Start Training
history_4 = model_4.fit(
    x = x_train, 
    y = y_train, 
    batch_size = batch_size,
    shuffle=True,
    epochs = epochs,
    validation_split=0.1
)

In [None]:
# Print Finish Training time
tpe = pytz.timezone('Asia/Taipei')
utc_finish = datetime.now()
print("Finish Training Time", tpe.fromutc(utc_finish))

difference = utc_finish-utc_start
seconds_in_day = 24 * 60 * 60
divmod(difference.days * seconds_in_day + difference.seconds, 60)

In [None]:
plot_learning_curves(history_4)

In [None]:
# 測試集標籤預測
y_pred = model_4.predict(x_test)

In [None]:
# 模型預測後的標籤
predict_label = np.argmax(y_pred,axis=1)
print(predict_label)
# print(len(predict_label))

In [None]:
pd.crosstab(true_label,predict_label,rownames=['實際值'],colnames=['預測值'])

In [None]:
# 整體準確度
count = 0
for i in range(len(y_pred)):
    if(np.argmax(y_pred[i]) == np.argmax(y_test[i])): #argmax函数找到最大值的索引，即为其类别
        count += 1
score = count/len(y_pred)
print('正确率为:%.2f%s' % (score*100,'%'))