# Biến toàn cục

In [None]:
train_csv = '../input/plant-pathology-2020-fgvc7/train.csv'
test_csv = '../input/plant-pathology-2020-fgvc7/test.csv'
img_path = '../input/plant-pathology-2020-fgvc7/images/'
img_format = '.jpg'

# Tìm hiểu về bộ data

In [None]:
import pandas as pd
import cv2
from random import randrange
import matplotlib.pyplot as plt

pd_train=pd.read_csv(train_csv)
pd_test=pd.read_csv(test_csv)

print(pd_train.head())
print(pd_test.head())

print(pd_train.shape)
print(pd_test.shape)

# random view 10 ảnh
_,ax = plt.subplots(1, 10, figsize=(15, 15))
for i in range(10):
    img_name = pd_train['image_id'][randrange(pd_train.shape[0])] + img_format
    img = cv2.imread(img_path + img_name)
    print('{0} has size {1}'.format(img_name, img.shape))
    ax[i].set_axis_off()
    ax[i].imshow(img)


Có tổng cộng 1821 bức ảnh 

Vậy mỗi bức ảnh là có size 1365 x 2048 và có 3 chanels

Output sẽ là một one-hot vector 4 chiều lần lượt là `[healthy  multiple_diseases  rust  scab]`

# Load dữ liệu

In [None]:
import numpy as np
def load_image(image_file,size=(256,256)):
    image = cv2.imread(image_file)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    return cv2.resize(image, dsize=size)
train_paths = img_path + pd_train['image_id'].values + '.jpg'
X = np.array([*map(lambda x: load_image(x), train_paths)], dtype=np.float32) / 255
Y = pd_train.iloc[:,1:5].values

# Tách dữ liệu thành tập train và tập test

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_valid, y_train, y_valid = train_test_split(X, Y, random_state=15)

In [None]:
from keras.preprocessing.image import ImageDataGenerator
imagegen = ImageDataGenerator(
    rotation_range=20,
    zoom_range=0.2,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    vertical_flip=True
)

# Build model

In [None]:
from tensorflow.keras import layers, models
model = models.Sequential()

model.add(layers.Conv2D(35,(3,3),padding='same', bias_initializer='zeros', kernel_initializer='glorot_uniform', strides=2,activation='relu',input_shape=(256,256,3)))
model.add(layers.Conv2D(35,(3,3),padding='same', bias_initializer='zeros', kernel_initializer='glorot_uniform', strides=2,activation='relu'))
model.add(layers.Dropout(0.25))
model.add(layers.MaxPooling2D((2,2)))

model.add(layers.Conv2D(35,(3,3),padding='same', bias_initializer='zeros', kernel_initializer='glorot_uniform', strides=2,activation='relu'))
model.add(layers.Conv2D(35,(3,3),padding='same', bias_initializer='zeros', kernel_initializer='glorot_uniform', strides=2,activation='relu'))
model.add(layers.Dropout(0.25))
model.add(layers.MaxPooling2D((5,5)))

model.add(layers.Conv2D(50,(3,3),padding='same', bias_initializer='zeros', kernel_initializer='glorot_uniform', strides=2,activation='relu'))
model.add(layers.Conv2D(50,(3,3),padding='same', bias_initializer='zeros', kernel_initializer='glorot_uniform', strides=2,activation='relu'))
model.add(layers.Dropout(0.25))
model.add(layers.GlobalMaxPool2D())

model.add(layers.Flatten())
model.add(layers.Dense(64,activation='relu'))
model.add(layers.Dropout(0.25))
model.add(layers.BatchNormalization())
model.add(layers.Dense(4,activation='softmax'))
model.summary()
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['acc'])

# Thực hiện train

In [None]:
from keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
rlr = ReduceLROnPlateau(patience=15, verbose=1)
es = EarlyStopping(patience=35, restore_best_weights=True, verbose=1)
mc = ModelCheckpoint('model.hdf5', save_best_only=True, verbose=0)

history = model.fit_generator(
    imagegen.flow(X_train, y_train, batch_size=32),
    epochs=400,
    steps_per_epoch=X_train.shape[0] // 32,
    verbose=0,
    callbacks=[rlr, es, mc],
    validation_data=(X_valid, y_valid)
)

# Vẽ đồ thị quá trình train

In [None]:
h = history.history

offset = 5
epochs = range(offset, len(h['loss']))

plt.figure(1, figsize=(20, 6))

plt.subplot(122)
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.plot(h[f'acc'], label='train')
plt.plot(h[f'val_acc'], label='val')
plt.legend()

plt.show()

# Dùng mô hình đã được train để chạy test

In [None]:
from keras.models import load_model

model = load_model('model.hdf5')

test_paths = img_path + pd_test['image_id'].values + '.jpg'
test_images = np.array([*map(lambda x: load_image(x), test_paths)], dtype=np.float32) / 255

pred_test = model.predict(test_images)

res = pd.DataFrame()
res['image_id'] = pd_test['image_id'].values
res['healthy'] = pred_test[:, 0]
res['multiple_diseases'] = pred_test[:, 1]
res['rust'] = pred_test[:, 2]
res['scab'] = pred_test[:, 3]
res.to_csv('submission.csv', index=False)