In [1]:
import cv2
import numpy as np
import os

from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.models import Model, Sequential
from keras.preprocessing.image import ImageDataGenerator
from keras.utils.np_utils import to_categorical
from resnet_builder import resnet # 這是從 resnet_builder.py 中直接 import 撰寫好的 resnet 函數
from sklearn.model_selection import train_test_split
from tensorflow.keras.optimizers import Adam

In [2]:
train_path = './2021-ml-100-marathon-final-exam/image_data/train'
test_path = './2021-ml-100-marathon-final-exam/image_data/test'

In [3]:
os.listdir(train_path)
num_classes = len(os.listdir(train_path))

In [4]:
x = [] 
y = []

for idx, flower in enumerate(os.listdir(train_path)):
    for filename in os.listdir(f'{train_path}/{flower}'):
        y.append([idx])
        img = cv2.imread(f'{train_path}/{flower}/{filename}')
        img = cv2.resize(img, (100, 100))
        x.append(img)

In [5]:
x = np.array(x)
y = np.array(y)

x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=777, train_size=0.8)

In [6]:
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

x_train = x_train / 255.
x_test = x_test / 255.
y_train = to_categorical(y_train, num_classes) # 類別的數量，train 共有 5 個類別
y_test = to_categorical(y_test, num_classes)

In [7]:
x_train.shape

(2258, 100, 100, 3)

In [8]:
y_train.shape

(2258, 5)

In [9]:
# 建立 ImageDataGenerator，並指定我們要做資料增強的數值範圍
data_generator = ImageDataGenerator(
    rotation_range=40, # 角度值，0~180，影象旋轉
    width_shift_range=0.2, # 水平平移，相對總寬度的比例
    height_shift_range=0.2, # 垂直平移，相對總高度的比例
    shear_range=0.2, # 隨機錯切換角度
    zoom_range=0.2, # 隨機縮放範圍
    horizontal_flip=True, # 一半影象水平翻轉
    fill_mode='nearest' # 填充新建立畫素的方法
)

In [10]:
# 建立 ResNet 模型
# model = resnet(input_shape=_train.shape[1:]) 
model = Sequential()
model.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=x_train.shape[1:]))
model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 100, 100, 32)      896       
                                                                 
 activation (Activation)     (None, 100, 100, 32)      0         
                                                                 
 conv2d_1 (Conv2D)           (None, 98, 98, 32)        9248      
                                                                 
 activation_1 (Activation)   (None, 98, 98, 32)        0         
                                                                 
 max_pooling2d (MaxPooling2D  (None, 49, 49, 32)       0         
 )                                                               
                                                                 
 dropout (Dropout)           (None, 49, 49, 32)        0         
                                                        

In [11]:
batch_size = 64 # batch 的大小，如果出現 OOM error，請降低這個值
num_classes = 5 # 類別的數量
epochs = 30 # 訓練整個資料集共 30個循環

model.compile(loss='categorical_crossentropy',
              optimizer=Adam(),
              metrics=['accuracy'])

model.fit_generator(data_generator.flow(x_train, y_train, batch_size=batch_size),
                    steps_per_epoch=int(len(x_train)/batch_size), 
                    epochs=epochs,
                    verbose=1,
                    validation_data=(x_test, y_test))

score = model.evaluate(x_test, y_test, verbose=1)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

  model.fit_generator(data_generator.flow(x_train, y_train, batch_size=batch_size),


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Test loss: 0.7207194566726685
Test accuracy: 0.7274336218833923


In [18]:
test_img = []
test_filenames = []

for filename in os.listdir(test_path):
    test_filenames.append(filename)
    img = cv2.imread(f'{test_path}/{filename}')
    img = cv2.resize(img, (100, 100))
    test_img.append(img)

In [19]:
test_img = np.array(test_img)
test_img.shape

(2000, 100, 100, 3)

In [20]:
test_img = test_img / 255.
pred = model.predict(test_img)
pred = np.argmax(pred, axis=1)
pred

array([2, 0, 4, ..., 0, 3, 4], dtype=int64)

In [21]:
import csv
with open('Day_101_final_exam.csv','w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['id', 'flower_class'])
    for f, p in zip(test_filenames, pred):
        writer.writerow([f[:-4], p])