In [1]:
# 設定超參數HyperParameters 
batch_size =  64
epochs = 30
image_size = 128
lr=0.01

## 一、載入相關套件

In [2]:
import os
import cv2
import csv
import random
import time
import numpy as np
import pandas as pd
import matplotlib.image as mpimg # mpimg 用於讀取圖片
import matplotlib.pyplot as plt # plt 用於顯示圖片
import seaborn as sns

In [3]:
import tensorflow as tf

from tensorflow.keras.applications import InceptionResNetV2
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.inception_resnet_v2 import preprocess_input
from tensorflow.keras import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import utils as np_utils
from tensorflow import keras
from tensorflow.keras import optimizers
from tensorflow.keras.layers import Input, Dropout, Flatten, Convolution2D, MaxPooling2D, ZeroPadding2D, Dense, Activation, Conv2DTranspose

In [4]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)

1 Physical GPUs, 1 Logical GPUs


In [5]:
# Print Start time
from datetime import datetime, timezone
import pytz

tpe = pytz.timezone('Asia/Taipei')
utc_start = datetime.now()
print("Start Time", tpe.fromutc(utc_start))

Start Time 2020-06-10 01:47:00.783776+08:00


## 測試集預測準確度

In [6]:
csvfile_test = open('dev.csv')
reader_test = csv.reader(csvfile_test)

In [7]:
# 讀取csv標籤
labels = []
for line in reader_test:
    tmp = [line[0],line[1]]
    # print tmp
    labels.append(tmp)
csvfile_test.close() 

In [8]:
labels = labels[1:]  # remove the first row
print("芒果圖片數量: ",len(labels))

芒果圖片數量:  800


In [9]:
# 轉換圖片的標籤
for i in range(len(labels)):
    labels[i][1] = labels[i][1].replace("A","0")
    labels[i][1] = labels[i][1].replace("B","1")
    labels[i][1] = labels[i][1].replace("C","2")

In [10]:
x_test = []
y_test = []

for i in range(len(labels)):
    img = cv2.imread("C1-P1_Dev/" + labels[i][0] )
    res = cv2.resize(img,(image_size,image_size),interpolation=cv2.INTER_LINEAR)
    res = img_to_array(res)
    x_test.append(res)    
    y_test.append(labels[i][1])

In [11]:
y_test_org = y_test

# 轉換至array的格式
x_test = np.array(x_test)
y_test = np.array(y_test)

In [12]:
# 轉換至float的格式
for i in range(len(x_test)):
    x_test[i] = x_test[i].astype('float32')
# x_test = x_test/255

In [13]:
# 標籤進行one-hotencoding
y_test = np_utils.to_categorical(y_test)

In [14]:
# 測試集標籤預測
model = []
y_pred = []

model_count = 10
'''
model1 = tf.keras.models.load_model("1.h5")
print("done loading 1")
model2 = tf.keras.models.load_model("2.h5")
print("done loading 2")
model3 = tf.keras.models.load_model("3.h5")
print("done loading 3")
model4 = tf.keras.models.load_model("4.h5")
print("done loading 4")
y_pred1 = model1.predict(x_test)
y_pred2 = model2.predict(x_test)
y_pred3 = model3.predict(x_test)
y_pred4 = model4.predict(x_test)
#y_pred5 = model5.predict(x_test)
'''
for i in range(1,model_count+1):
    model_temp=tf.keras.models.load_model(str(i)+".h5")
    model.append(model_temp)
for i in range(0,model_count):
    pred_temp = model[i].predict(x_test)
    y_pred.append(pred_temp)

In [69]:
# 模型預測後的標籤
'''
predict_label1 = np.argmax(y_pred1,axis=1)
predict_label2 = np.argmax(y_pred2,axis=1)
predict_label3 = np.argmax(y_pred3,axis=1)
predict_label4 = np.argmax(y_pred4,axis=1)
#predict_label5 = np.argmax(y_pred5,axis=1)
#predict_label = np.argmax(y_pred,axis=1)
predict_label = np.round((predict_label1+predict_label2+predict_label3+predict_label4)/4,0)
print(predict_label)
# print(len(predict_label))
'''
predict_labels = []
for i in range(0,model_count):
    pred_label_temp=np.argmax(y_pred[i],axis=1)
    predict_labels.append(pred_label_temp)

avg_or_vote=1  #0是平均，1是投票

if avg_or_vote==0:
    predict_label = predict_labels[0]
    for i in range(1,model_count):
        predict_label+=predict_labels[i]
    predict_label = np.round(predict_label/model_count,0)
else:
    predict_label=[]
    for i in range(0,len(predict_labels[0])):
        vote=[]
        for j in range(0,len(predict_labels)):
            vote.append(predict_labels[j][i])
        counts=np.bincount(vote)
        predict_label.append(np.argmax(counts))
    predict_label=np.array(predict_label)


In [70]:
# 模型原標籤
true_label = y_test_org
true_label = np.array(true_label)
print(true_label)
# print(len(true_label))

['1' '0' '1' '1' '2' '1' '2' '0' '1' '0' '0' '1' '2' '1' '2' '1' '1' '0'
 '0' '1' '0' '0' '2' '0' '1' '0' '2' '2' '1' '1' '1' '0' '2' '2' '0' '0'
 '0' '0' '0' '0' '1' '2' '0' '2' '2' '2' '1' '0' '2' '0' '1' '1' '1' '2'
 '2' '0' '1' '0' '2' '0' '1' '1' '2' '0' '1' '2' '2' '0' '2' '1' '2' '1'
 '1' '2' '2' '2' '1' '0' '2' '0' '0' '2' '1' '1' '2' '2' '2' '2' '0' '1'
 '2' '2' '1' '2' '2' '2' '1' '0' '2' '2' '2' '1' '1' '2' '1' '1' '1' '1'
 '1' '2' '2' '0' '1' '0' '0' '2' '2' '0' '2' '0' '1' '0' '2' '1' '2' '1'
 '0' '1' '2' '1' '0' '0' '0' '1' '0' '2' '1' '1' '0' '2' '0' '1' '2' '0'
 '0' '0' '2' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '0' '2' '1' '2' '1'
 '0' '1' '2' '2' '1' '2' '0' '2' '1' '2' '2' '2' '2' '1' '0' '0' '0' '1'
 '2' '2' '0' '2' '1' '2' '0' '0' '2' '1' '2' '2' '1' '1' '2' '1' '0' '2'
 '1' '0' '2' '2' '1' '0' '1' '2' '0' '1' '2' '2' '2' '1' '1' '2' '0' '2'
 '1' '2' '1' '1' '0' '1' '1' '1' '1' '1' '0' '0' '1' '1' '0' '2' '1' '1'
 '0' '1' '2' '0' '2' '0' '1' '2' '2' '0' '1' '0' '2

In [71]:
pd.crosstab(true_label,predict_label,rownames=['實際值'],colnames=['預測值'])

預測值,0,1,2
實際值,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,205,32,6
1,54,199,40
2,4,34,226


In [72]:
# 整體準確度
count = 0
for i in range(len(predict_label)):
    if(int(predict_label[i]) == int(true_label[i])): #argmax函数找到最大值的索引，即为其类别
        count += 1
score = count/len(predict_label)
print('正确率为:%.2f%s' % (score*100,'%'))

正确率为:78.75%


In [73]:
# 儲存模型相關參數
#model.save('4.h5')