In [None]:
import cv2
from glob import glob
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd

In [None]:
scale_to = 100  # pixel of image to re-scale
num_of_seed = 7  # fixing random seeds

file_path = '/kaggle/input/plant-seedlings-classification/train/*/*.png'
train_images = glob(file_path)

In [None]:
train_images

In [None]:
train_img_array = []
train_label_array = []
count = 1
num = len(train_images)

# Image resizing, get all labels
for img in train_images:
    print(str(count) + "/" + str(num), end="\r")
    train_img_array.append(cv2.resize(cv2.imread(img), (scale_to, scale_to)))  # Get image (with resizing)
    img_array = img.split('/')
    train_label_array.append(img_array[5]) # image type
    count += 1

train_images = np.asarray(train_img_array)  # Train images set
train_labels = pd.DataFrame(train_label_array)  # Train labels set

In [None]:
for count in range(8):
    plt.subplot(2, 4, count + 1)
    plt.imshow(train_images[count])

In [None]:
cleaned_train_images = []
show_samples = True
for img in train_images:
    # gaussian blur
    blur_img = cv2.GaussianBlur(img, (5, 5), 0)

    # convert to HSV image
    hsvImg = cv2.cvtColor(blur_img, cv2.COLOR_BGR2HSV)

    # Create mask (parameters - green color range)
    lower_green = (25, 41, 50)
    upper_green = (74, 255, 255)
    mask = cv2.inRange(hsvImg, lower_green, upper_green)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (11, 11))
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)

    # Create bool mask
    bMask = mask > 0

    # Apply the mask
    cleaned = np.zeros_like(img, np.uint8)  # Create empty image
    cleaned[bMask] = img[bMask]  # Apply boolean mask to the origin image

    cleaned_train_images.append(cleaned)  # Append image without backgroung

    # Show examples
    if show_samples:
        plt.subplot(2, 3, 1); plt.imshow(img)  # Show the original image
        plt.subplot(2, 3, 2); plt.imshow(blur_img)  # Blur image
        plt.subplot(2, 3, 3); plt.imshow(hsvImg)  # HSV image
        plt.subplot(2, 3, 4); plt.imshow(mask)  # Mask
        plt.subplot(2, 3, 5); plt.imshow(bMask)  # Boolean mask
        plt.subplot(2, 3, 6); plt.imshow(cleaned)  # Image without background
        show_samples = False

cleaned_train_img = np.asarray(cleaned_train_images)

In [None]:
for i in range(8):
    plt.subplot(2, 4, i + 1)
    plt.imshow(cleaned_train_img[i])

In [None]:
cleaned_train_imgs = cleaned_train_img / 255

In [None]:
from keras.utils import np_utils
from sklearn import preprocessing
import matplotlib.pyplot as plt

# Encode labels and create classes
le = preprocessing.LabelEncoder()
le.fit(train_labels[0])
print("Classes: " + str(le.classes_))
encode_train_labels = le.transform(train_labels[0])

# Make labels categorical
cleaned_train_label = np_utils.to_categorical(encode_train_labels)
num_classes = cleaned_train_label.shape[1]
print("num of classes: " + str(num_classes))

# Plot of label types numbers
train_labels[0].value_counts().plot(kind='bar')

In [None]:
from sklearn.model_selection import train_test_split

trainX, testX, trainY, testY = train_test_split(cleaned_train_imgs, cleaned_train_label,
                                                test_size=0.2, random_state=num_of_seed,
                                                stratify = cleaned_train_label)

In [None]:
from keras.preprocessing.image import ImageDataGenerator
data_gen = ImageDataGenerator(
        rotation_range=180,  # randomly rotate images in the range
        zoom_range = 0.1, # Randomly zoom image
        width_shift_range=0.1,  # randomly shift images horizontally
        height_shift_range=0.1,  # randomly shift images vertically
        horizontal_flip=True,  # randomly flip images horizontally
        vertical_flip=True  # randomly flip images vertically
    )
data_gen.fit(trainX)

In [None]:
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Flatten
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers import BatchNormalization

numpy.random.num_of_seed(num_of_seed)  # num_of_seed

model = Sequential()

model.add(Conv2D(filters=64, kernel_size=(5, 5), input_shape=(scale_to, scale_to, 3), activation='relu'))
model.add(BatchNormalization(axis=3))
model.add(Conv2D(filters=64, kernel_size=(5, 5), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(BatchNormalization(axis=3))
model.add(Dropout(0.1))

model.add(Conv2D(filters=128, kernel_size=(5, 5), activation='relu'))
model.add(BatchNormalization(axis=3))
model.add(Conv2D(filters=128, kernel_size=(5, 5), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(BatchNormalization(axis=3))
model.add(Dropout(0.1))

model.add(Conv2D(filters=256, kernel_size=(5, 5), activation='relu'))
model.add(BatchNormalization(axis=3))
model.add(Conv2D(filters=256, kernel_size=(5, 5), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(BatchNormalization(axis=3))
model.add(Dropout(0.1))

model.add(Flatten())

model.add(Dense(256, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))

model.add(Dense(256, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))

model.add(Dense(num_classes, activation='softmax'))

model.summary()

# compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, CSVLogger

# learning rate reduction
learning_rate_reduction = ReduceLROnPlateau(monitor='val_accuracy',
                                            patience=3,
                                            verbose=1,
                                            factor=0.4,
                                            min_lr=0.00001)

# add check points
file_path = "/kaggle/working/weights.best_{epoch:02d}-{val_accuracy:.2f}.hdf5"

check_point = ModelCheckpoint(file_path, monitor='val_accuracy',
                             verbose=1, save_best_only=True, mode='max')

file_path = "/kaggle/working/weights.last_auto4.hdf5"
checkpoint_all = ModelCheckpoint(file_path, monitor='val_accuracy',
                                 verbose=1, save_best_only=False, mode='max')

# all callbacks
callbacks_list = [check_point, learning_rate_reduction, checkpoint_all]

# fit model
hist = model.fit_generator(data_gen.flow(trainX, trainY, batch_size=75), epochs=35, validation_data=(testX, testY), callbacks=callbacks_list)

In [None]:
model.load_weights("/kaggle/working/weights.last_auto4.hdf5")

In [None]:
print(model.evaluate(trainX, trainY))  # evaluate on train set
print(model.evaluate(testX, testY))  # evaluate on test set

In [None]:
from sklearn.metrics import confusion_matrix
import itertools

def plot_confusion_matrix(cm, classes,
                          normalize = False,
                          title = 'Confusion matrix',
                          cmap = plt.cm.Blues):

    fig = plt.figure(figsize=(10,10))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=90)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

# Predict the values from the validation dataset
pred_y = model.predict(testX)
pred_y_classes = np.argmax(pred_y, axis = 1)
true_y = np.argmax(testY, axis = 1)

# confusion matrix
confusion_MTX = confusion_matrix(true_y, pred_y_classes)

# plot the confusion matrix
plot_confusion_matrix(confusion_MTX, classes = le.classes_)

In [None]:
test_images_path = '/kaggle/input/plant-seedlings-classification/test/*.png'
test_images = glob(test_images_path)

In [None]:
test_img_array = []
test_id_array = []
count = 1
num = len(test_images)

# Obtain images and resizing, obtain labels
for img in test_images:
    print("Obtain images: " + str(count) + "/" + str(num), end='\r')
    img_array = img.split('/')
    test_id_array.append(img_array[5]) # image id
    test_img_array.append(cv2.resize(cv2.imread(img), (scale_to, scale_to)))
    count += 1

test_imgs = np.asarray(test_img_array)  # Train images set

for i in range(8):
    plt.subplot(2, 4, i + 1)
    plt.imshow(test_imgs[i])

In [None]:
cleaned_test_img = []
show_samples = True
for img in test_imgs:
    # gaussian blur
    blur_img = cv2.GaussianBlur(img, (5, 5), 0)

    # convert to HSV image
    hsvImg = cv2.cvtColor(blur_img, cv2.COLOR_BGR2HSV)

    # create mask (parameters - green color range)
    lower_green = (25, 41, 50)
    upper_green = (74, 255, 255)
    mask = cv2.inRange(hsvImg, lower_green, upper_green)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (11, 11))
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)

    # create bool mask
    bMask = mask > 0

    # Apply the mask
    cleaned = np.zeros_like(img, np.uint8)  # Create empty image
    cleaned[bMask] = img[bMask]  # Apply boolean mask to the origin image

    cleaned_test_img.append(cleaned)  # Append image without background

    # Show examples
    if show_samples:
        plt.subplot(2, 3, 1); plt.imshow(img)  # Show the original image
        plt.subplot(2, 3, 2); plt.imshow(blur_img)  # Blur image
        plt.subplot(2, 3, 3); plt.imshow(hsvImg)  # HSV image
        plt.subplot(2, 3, 4); plt.imshow(mask)  # Mask
        plt.subplot(2, 3, 5); plt.imshow(bMask)  # Boolean mask
        plt.subplot(2, 3, 6); plt.imshow(cleaned)  # Image without background
        show_samples = False

cleaned_test_img = np.asarray(cleaned_test_img)

In [None]:
# Show sample result
for i in range(8):
    plt.subplot(2, 4, i + 1)
    plt.imshow(cleaned_test_img[i])

In [None]:
cleaned_test_img = cleaned_test_img / 255

In [None]:
pred = model.predict(cleaned_test_img)

In [None]:
# Write result to file
predNum = np.argmax(pred, axis=1)
predStr = le.classes_[predNum]

res = {'file': test_id_array, 'species': predStr}
res = pd.DataFrame(res)


In [None]:
res.to_csv("/kaggle/working/result_v2.csv", index=False)

# Transfer Learning

In [None]:
import pandas as pd
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import matplotlib.pyplot as plt
import numpy as np
from keras import applications
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.models import Model
from keras.layers import Dropout, Flatten, Dense
from keras.callbacks import ModelCheckpoint, EarlyStopping

In [None]:
model = applications.VGG19(weights = "imagenet", include_top=False, input_shape = (70, 70, 3))
for layer in model.layers[:5]:
    layer.trainable = False

val = model.output
val = Flatten()(val)
val = Dense(1024, activation="relu")(val)
val = Dropout(0.5)(val)
val = Dense(1024, activation="relu")(val)
val = Dropout(0.5)(val)
predictions = Dense(12, activation="softmax")(val)

model_final = Model(input = model.input, output = predictions)

model_final.compile(loss = "categorical_crossentropy", optimizer = optimizers.SGD(lr=0.0001, momentum=0.9), metrics=["accuracy"])
model_final.summary()

In [None]:
images_generator = ImageDataGenerator(
            rotation_range=360,
            width_shift_range=0.3,
            height_shift_range=0.3,
            zoom_range=0.3,
            horizontal_flip=True,
            vertical_flip=True)

In [None]:
train_data_dir = "/kaggle/input/plant-seedlings-classification/train/"
train_generator = images_generator.flow_from_directory(
                        train_data_dir,
                        target_size = (240, 240),
                        batch_size = 16, 
                        class_mode = "categorical")

In [None]:
train_generator.n

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
train, val = train_test_split( train_generator, test_size=0.3, random_state=42)

In [None]:
checkpoint = ModelCheckpoint("/kaggle/working/vgg16_withvalid.h5", monitor='loss', verbose=1,
                             save_best_only=True, save_weights_only=False, mode='auto', period=1)
early = EarlyStopping(monitor='loss', min_delta=0, patience=10, verbose=1, mode='auto')

In [None]:
batch_size = 16
STEP_SIZE_TRAIN=len(trainX)/batch_size
STEP_SIZE_VALID=len(testX)/batch_size
trainX, testX, trainY, testY
model_final.fit_generator(
                    data_gen.flow(trainX, trainY, batch_size=16),
                    validation_data=(testX,testY),
                    steps_per_epoch=STEP_SIZE_TRAIN,
                    validation_steps=STEP_SIZE_VALID,
                    epochs = 50,
                    shuffle= True,
                    callbacks = [checkpoint, early])

In [None]:
pred = model_final.predict(cleaned_test_img)
# write result to file
pred_num = np.argmax(pred, axis=1)
predStr = le.classes_[pred_num]

res = {'file': test_id_array, 'species': predStr}
res = pd.DataFrame(res)

In [None]:
print(model_final.evaluate(trainX, trainY))  # Evaluate on train set
print(model_final.evaluate(testX, testY)) 

In [None]:
res.to_csv("/kaggle/working/result_v2.1.csv", index=False)