In [None]:
import glob
import os
from PIL import Image, ImageOps
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
import pandas as pd
import matplotlib.pyplot as plt
import time

In [None]:
'''
I data engineering 
'''
imgs_size = 128
# imgs_size = 75
# imgs_size = 48
# imgs_size = 100
z = glob.glob('../input/plant-seedlings-classification/train/*/*.png')
ori_label = [] # create empty list to store labels
ori_imgs = []  # create empty list to store images
for fn in z:
    if fn[-3:] != 'png':
        continue
    ori_label.append(fn.split('/')[-2])
    new_img = Image.open(fn)
    ori_imgs.append(ImageOps.fit(new_img, (imgs_size, imgs_size), Image.ANTIALIAS).convert('RGB'))   
    # reshape the image to 128 X 128 size, use antialias and convert it to RGB

imgs = np.array([np.array(im) for im in ori_imgs])
imgs = imgs.reshape(imgs.shape[0], imgs_size, imgs_size, 3) / 255  # nomalization
# imgs = imgs.reshape(imgs.shape[0], 128,128, 3)
lb = LabelBinarizer().fit(ori_label) 
label = lb.transform(ori_label) # transform the label to a matrix
trainX, validX, trainY, validY = train_test_split(imgs, label, test_size=0.15, random_state=20) # split the data set

In [None]:
'''
II creating model
'''
running_time = time.time()
from keras.layers import Dropout, Input, Dense, Activation,GlobalMaxPooling2D, BatchNormalization, Flatten, Conv2D, MaxPooling2D
from keras.models import Model, load_model
from keras.optimizers import Adam
import keras
model = keras.Sequential()
IM_input = Input((imgs_size, imgs_size, 3))
IM = Conv2D(64, (5,5))(IM_input) # convolution layer: 64 2X2 filter with stride 1
IM = BatchNormalization(axis = 3)(IM) # Batch Normalization to mitigate the Internal Covariate Shift 
IM = Activation('relu')(IM)  # Relu function 
# IM = MaxPooling2D((3, 3), strides=(2, 2), padding = 'same')(IM)
IM = Conv2D(64, (5,5))(IM) 
IM = BatchNormalization(axis = 3)(IM)
IM = Activation('relu')(IM)
IM = MaxPooling2D((2, 2), strides=(2, 2), padding = 'same')(IM)  # maxpooling layer: 3X3 filter with stride 2. NO Padding

IM = Conv2D(128, (5,5))(IM) 
IM = BatchNormalization(axis = 3)(IM)
IM = Activation('relu')(IM)
# IM = MaxPooling2D((3, 3), strides=(2, 2), padding = 'same')(IM)
IM = Conv2D(128, (5,5))(IM)
IM = BatchNormalization(axis = 3)(IM)
IM = Activation('relu')(IM)
IM = MaxPooling2D((2, 2), strides=(2, 2), padding = 'same')(IM)

IM = Conv2D(256, (5,5))(IM)
IM = BatchNormalization(axis = 3)(IM)
IM = Activation('relu')(IM)
# IM = MaxPooling2D((3, 3), strides=(2, 2), padding = 'same')(IM)
IM = Conv2D(256, (5,5))(IM)
IM = BatchNormalization(axis = 3)(IM)
IM = Activation('relu')(IM)
IM = MaxPooling2D((3, 3), strides=(2, 2), padding = 'same')(IM)

IM = Conv2D(512, (2, 2))(IM)
IM = BatchNormalization(axis = 3)(IM)
IM = Activation('relu')(IM)
# IM = MaxPooling2D((3, 3), strides=(2, 2), padding = 'same')(IM)
IM = Conv2D(512, (2, 2))(IM)
IM = BatchNormalization(axis = 3)(IM)
IM = Activation('relu')(IM)
# IM = MaxPooling2D((3, 3), strides=(2, 2))(IM)

IM = GlobalMaxPooling2D()(IM)
# IM = keras.layers.Flatten()(IM)

# IM = Dense(512, activation='relu')(IM)  #  fully-connected layer with 512 nodes 
# IM = Dropout(0.3)(IM)
# IM = Dense(256, activation='relu')(IM)
# IM = Dropout(0.2)(IM)
IM = Dense(128, activation='relu')(IM)
IM = Dropout(0.1)(IM)
IM = Dense(64, activation='relu')(IM)
IM = Dense(12, activation='softmax')(IM) # softmax layer with 12 classes
model = Model(inputs=IM_input, outputs=IM)
model.summary()
model.compile(loss='categorical_crossentropy',
              optimizer=Adam(lr=1e-3), metrics=['acc'])
# model.compile(loss='categorical_crossentropy',metrics=['acc'])

In [None]:
'''
III traing model
'''
from keras.callbacks import LearningRateScheduler, EarlyStopping
from keras.callbacks import ModelCheckpoint

batch_size = 30  # batch size is 20
epochs = 200
learning_rate = LearningRateScheduler(lambda x: 1e-3 * 0.9 ** x,verbose = 1)


earlystop = EarlyStopping(patience=30) # stop training when the validation loss does not change for 50 iterations

modelsave = ModelCheckpoint(filepath='model.h5', save_best_only=True, verbose=1)

model_value = model.fit(trainX, trainY, batch_size=batch_size,
                        epochs= epochs, # maximum 200 iterations
                        validation_data=(validX, validY),
                        callbacks=[learning_rate, earlystop, modelsave])

In [None]:
'''
IV augmentation
'''
# construct the training image generator for data augmentation
aug = keras.preprocessing.image.ImageDataGenerator(rotation_range=20, zoom_range=0.15,
                                                   width_shift_range=0.2, height_shift_range=0.2, 
                                                   shear_range=0.15, horizontal_flip=True, fill_mode="nearest")
# train the network
model_value_aug = model.fit_generator(aug.flow(trainX, trainY, batch_size=batch_size),
                                  validation_data=(validX, validY), 
                                  steps_per_epoch=len(trainX) // batch_size,
                                  epochs=epochs,
                                  callbacks=[learning_rate, earlystop, modelsave])

In [None]:
'''
V testing
'''
z = glob.glob('../input/plant-seedlings-classification/test/*.png')
test_imgs = []
names = []
for fn in z:
    if fn[-3:] != 'png':
        continue
    names.append(fn.split('/')[-1])
    new_img = Image.open(fn)
    test_img = ImageOps.fit(new_img, (imgs_size, imgs_size), Image.ANTIALIAS).convert('RGB')
    test_imgs.append(test_img)
model = load_model('model.h5')
timgs = np.array([np.array(im) for im in test_imgs])
testX = timgs.reshape(timgs.shape[0], imgs_size, imgs_size, 3) / 255
# testX = timgs.reshape(timgs.shape[0], 128,128, 3) 

prediction = model.predict(testX) # make prediction
test_y = lb.inverse_transform(prediction) # transform the prediction to an array

In [None]:
running_time = time.time()-running_time
print(running_time)

In [None]:
'''
VI output result
'''
df = pd.DataFrame(data={'file': names, 'species': test_y})
df_sort = df.sort_values(by=['file'])
df_sort.to_csv('results.csv', index=False) # output the results

In [None]:
'''
VII plotting picture for traing
'''
import matplotlib.pyplot as plt
epoch_num = []
for i in range(200):
    epoch_num.append(i+1)
epoch_num = pd.DataFrame(epoch_num)
acc = pd.DataFrame(model_value.history['acc'])
loss = pd.DataFrame(model_value.history['loss'])
val_acc = pd.DataFrame(model_value.history['val_acc'])
val_loss = pd.DataFrame(model_value.history['val_loss'])

train_model = pd.concat((acc, loss, val_acc, val_loss, epoch_num),axis=1, 
                                  ignore_index=True) # create a list
train_model.columns = ['acc', 'loss', 'val_acc', 'val_loss', 'Epochs'] # change the column index

train_model.acc.plot(x = 'epoch_num',label = 'Train accuracy',legend = True)
train_model.loss.plot(x = 'epoch_num',label = 'Train loss',legend = True)
train_model.val_acc.plot(x = 'epoch_num',label = 'Validation accuracy',legend = True)
train_model.val_loss.plot(x = 'epoch_num',label = 'Validation loss',legend = True,
                          figsize = (16,10),grid=True, fontsize=15)

plt.xlabel('Epochs',fontsize = 15) # x axis
plt.ylabel('Rate',fontsize = 15) # y axis
plt.title('Training model',fontsize = 20) # figure title
plt.show()


In [None]:
'''
VIII plotting picture for augmentation
'''
import matplotlib.pyplot as plt
epoch_num = []
for i in range(200):
    epoch_num.append(i+1)
epoch_num = pd.DataFrame(epoch_num)
acc = pd.DataFrame(model_value_aug.history['acc'])
loss = pd.DataFrame(model_value_aug.history['loss'])
val_acc = pd.DataFrame(model_value_aug.history['val_acc'])
val_loss = pd.DataFrame(model_value_aug.history['val_loss'])

train_model = pd.concat((acc, loss, val_acc, val_loss, epoch_num),axis=1, 
                                  ignore_index=True) # create a list
train_model.columns = ['acc', 'loss', 'val_acc', 'val_loss', 'Epochs'] # change the column index

train_model.acc.plot(x = 'epoch_num',label = 'Train accuracy',legend = True)
train_model.loss.plot(x = 'epoch_num',label = 'Train loss',legend = True)
train_model.val_acc.plot(x = 'epoch_num',label = 'Validation accuracy',legend = True)
train_model.val_loss.plot(x = 'epoch_num',label = 'Validation loss',legend = True,
                          figsize = (16,10),grid=True, fontsize=15)

plt.xlabel('Epochs',fontsize = 15) # x axis
plt.ylabel('Rate',fontsize = 15) # y axis
plt.title('Augmentation Training model',fontsize = 20) # figure title
plt.show()
