In [None]:
from __future__ import print_function
import numpy as np # For numerical fast numerical calculations
import matplotlib.pyplot as plt # For making plots
import pandas as pd 
import cv2
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt # plotting library
from keras.models import Sequential
from keras.layers import Dense , Activation, Dropout, Conv2D, Flatten, MaxPooling2D, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.metrics import AUC, Accuracy
from tensorflow.keras.utils import to_categorical, plot_model
import os, datetime
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator, img_to_array
from imblearn.over_sampling import SMOTE
from keras.regularizers import l2
from keras.callbacks import ReduceLROnPlateau, EarlyStopping
from keras.layers.normalization.batch_normalization import BatchNormalization
from tensorflow.keras import backend as K
from random import randint, seed
from datetime import datetime
K.clear_session()

In [None]:
IMG_SIZE = 300
BATCH_SIZE = 24
EPOCH = 100

In [None]:
train_data = pd.read_csv('../input/plant-pathology-2020-fgvc7/train.csv')
test_data = pd.read_csv('../input/plant-pathology-2020-fgvc7/test.csv')
# sub = pd.read_csv('../input/plant-pathology-2020-fgvc7/sample_submission.csv')
FILE_PATH = str("../input/plant-pathology-2020-fgvc7/images/")

Process the images, convert it into an array and store it in training_images

In [None]:
prog = tf.keras.utils.Progbar(1821,width=100,verbose=1)
training_images = []
for index, image in enumerate(train_data['image_id']):
    image_path = FILE_PATH + image + ".jpg"
    image = cv2.imread(image_path)
    image = cv2.resize(image, (IMG_SIZE, IMG_SIZE), interpolation=cv2.INTER_AREA)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    training_images.append(image)
    prog.update(index+1) 

In [None]:
x_train = np.ndarray(shape=(len(training_images), IMG_SIZE, IMG_SIZE,3),dtype=np.float32)
for index, image in enumerate(training_images): 
    x_train[index] = img_to_array(image)
    x_train[index] = training_images[index]
x_train = x_train/255
print(x_train.shape)

convert the labels into a one-hot-encoded format

In [None]:
labels = train_data[['healthy', 'multiple_diseases', 'rust', 'scab']]
y_train = np.array(labels.values)
print(y_train.shape)

plot and visualize the images

In [None]:
fig, axis = plt.subplots(1,5, figsize=(20,20))
seed(23)
for i in range(5):
    axis[i].set_axis_off()
    rand_num = randint(0,1821)
    axis[i].imshow(x_train[rand_num])
    title = "image:{} and class: {}".format(rand_num, y_train[rand_num])
    axis[i].set_title(title)

split the dataset into 20% validation and 80% training dataset

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x_train, y_train, test_size = 0.2, random_state = 42)
print("x_train:", x_train.shape, " y_train:", y_train.shape, " x_test:", x_test.shape, " y_test:", y_test.shape)
class_sum = np.sum(y_train, axis =0)
print(class_sum)

oversample the minority class and level the dataset. firstly reshape the iamges into a 2D array. Once the smote resample is done, reshape the image back to its original shape

In [None]:
# x_train = x_train.reshape(-1, IMG_SIZE * IMG_SIZE *3)
oversample = SMOTE(sampling_strategy='minority', k_neighbors=7)
# oversample = SMOTE()
x_train, y_train = oversample.fit_resample(x_train.reshape((-1, IMG_SIZE * IMG_SIZE *3)), y_train)
x_train = x_train.reshape((-1, IMG_SIZE, IMG_SIZE, 3))
x_test, y_test = oversample.fit_resample(x_test.reshape((-1, IMG_SIZE * IMG_SIZE *3)), y_test)
x_test = x_test.reshape((-1, IMG_SIZE, IMG_SIZE, 3))

check the shapes of the x_train and y_train. Additionally, look at the sum of the classes to see how many images are in each class

In [None]:
print("after smote: x_train:", x_train.shape, " y_train:", y_train.shape)
print("number of images in each class for training:", np.sum(y_train, axis =0))

print("after smote: x_test:", x_test.shape, " y_test:", y_test.shape)
print("number of images in each class for validation:", np.sum(y_test, axis =0))

plot images that have class multiple disease class to see how the smote method performed

In [None]:
fig, axis = plt.subplots(1,5, figsize=(20,20))
counter = 0
while counter < 5:
    rand_num = randint(0,x_train.shape[0])
    if y_train[rand_num][1] == 1: 
        axis[counter].set_axis_off()
        axis[counter].imshow(x_train[rand_num])
        title = "image:{} and class: {}".format(rand_num, y_train[rand_num])
        axis[counter].set_title(title)
        counter += 1

create the CNN model

In [None]:
reg_lambda = 0.005
model = Sequential() 
model.add(Conv2D(32, kernel_size=(3,3), input_shape=(IMG_SIZE, IMG_SIZE, 3), activation='relu', kernel_regularizer=l2(reg_lambda)))
model.add(BatchNormalization())
model.add(Conv2D(64, kernel_size=(3,3), activation='relu', kernel_regularizer=l2(reg_lambda)))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2), padding='SAME'))
model.add(Dropout(0.2))

model.add(Conv2D(128, kernel_size=(3,3), activation='relu', kernel_regularizer=l2(reg_lambda)))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2), padding='SAME'))
model.add(Dropout(0.2))

model.add(Conv2D(512, kernel_size=(3,3), activation='relu', kernel_regularizer=l2(reg_lambda)))
model.add(BatchNormalization())
model.add(Conv2D(128, kernel_size=(3,3), activation='relu', kernel_regularizer=l2(reg_lambda)))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2), padding='SAME'))
model.add(Dropout(0.2))

model.add(Conv2D(128, kernel_size=(3,3), activation='relu', kernel_regularizer=l2(reg_lambda)))
model.add(BatchNormalization())
model.add(Conv2D(64, kernel_size=(3,3), activation='relu', kernel_regularizer=l2(reg_lambda)))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2), padding='SAME'))
model.add(Dropout(0.2))

model.add(Conv2D(128, kernel_size=(3,3), activation='relu', kernel_regularizer=l2(reg_lambda)))
model.add(BatchNormalization())
model.add(Conv2D(512, kernel_size=(3,3), activation='relu', kernel_regularizer=l2(reg_lambda)))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2), padding='SAME'))
model.add(Dropout(0.2))

model.add(Flatten())
model.add(Dense(300, activation='relu', kernel_regularizer=l2(reg_lambda)))
model.add(BatchNormalization())
model.add(Dropout(0.3))

model.add(Dense(200, activation='relu', kernel_regularizer=l2(reg_lambda)))
model.add(BatchNormalization())
model.add(Dropout(0.3))

model.add(Dense(100, activation='relu', kernel_regularizer=l2(reg_lambda)))
model.add(BatchNormalization())
model.add(Dropout(0.3))
model.add(Dense(4, activation='softmax'))

model.summary()

define the callbacks, metrics and compile model

In [None]:
lr_reduce = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_accuracy',
    patience = 10, 
    verbose = 1,
    min_delta = 0.000001,
    min_lr=0,
    factor = 0.5, 
    mode = 'auto'
)
early_stop = tf.keras.callbacks.EarlyStopping(
    monitor = 'val_loss', 
    patience = 20,
    mode='auto',
    verbose=1, 
    restore_best_weights =True
)
adam = Adam(learning_rate=0.001)
rmsprop = tf.keras.optimizers.RMSprop(learning_rate = 0.001)
loss = 'categorical_crossentropy'
metrics = [tf.keras.metrics.CategoricalAccuracy(name='accuracy'), tf.keras.metrics.AUC(curve='ROC')]

model.compile(optimizer=adam, loss=loss, metrics=metrics)

In [None]:
train_datagen = ImageDataGenerator(
    rotation_range=45, 
    shear_range=.25,
    zoom_range = 0.20, 
    width_shift_range= 0.25,
    rescale = 1/255,
    height_shift_range= 0.25, 
    brightness_range=[.5,1.5],
    horizontal_flip=True, 
    vertical_flip=True,
    fill_mode = 'nearest'
)
validation_datagen = ImageDataGenerator(rescale = 1./255, rotation_range = 20, zoom_range= 10, horizontal_flip=True, vertical_flip=True)

In [None]:
start_time = datetime.now().time()
history = model.fit(train_datagen.flow(x_train, y_train, batch_size = BATCH_SIZE),
                    epochs = EPOCH, 
                    steps_per_epoch = x_train.shape[0]//BATCH_SIZE,
                    validation_data =train_datagen.flow(x_test,y_test, batch_size = BATCH_SIZE),
                    validation_steps = x_test.shape[0] // BATCH_SIZE,
                    callbacks= [lr_reduce, early_stop], 
                    verbose = 1 
                   )


In [None]:
print(start_time)
print(datetime.now().time())

In [None]:
from sklearn.metrics import roc_auc_score

pred_test = model.predict(x_test)
roc_sum = 0
for i in range(4):
    score = roc_auc_score(y_test[:, i], pred_test[:, i])
    roc_sum += score
    print(f'{score:.3f}')

roc_sum /= 4
print(f'totally:{roc_sum:.3f}')

In [None]:
h = history.history

offset = 5
epochs = range(offset, len(h['loss']))

plt.figure(1, figsize=(20, 6))

plt.subplot(121)
plt.xlabel('epochs')
plt.ylabel('loss')
plt.plot(epochs, h['loss'][offset:], label='train')
plt.plot(epochs, h['val_loss'][offset:], label='val')
plt.legend()

plt.subplot(122)
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.plot(h[f'accuracy'], label='train')
plt.plot(h[f'val_accuracy'], label='val')
plt.legend()
plt.show()


plt.figure(1, figsize=(20, 8))
plt.subplot(121)
plt.xlabel('epochs')
plt.ylabel('auc')
plt.plot(h[f'auc'], label='train')
plt.plot(h[f'val_auc'], label='val')
plt.legend()
plt.show()

In [None]:
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.metrics import confusion_matrix

class_labels = ["Healthy", "Multi", "Rust", "Scab"]

cm = confusion_matrix(np.asarray(y_test).argmax(axis=1), np.asarray(pred_test).argmax(axis=1))
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_labels)

disp.plot(cmap=plt.cm.Blues)
plt.show()

In [None]:
model.save("cnn_apr_12_adam_lr_0.001.h5")

In [None]:
prog = tf.keras.utils.Progbar(1821,width=100,verbose=1)
testing_images = []
for index, image in enumerate(test_data['image_id']):
    image_path = FILE_PATH + image + ".jpg"
    image = cv2.imread(image_path)
    image = cv2.resize(image, (IMG_SIZE, IMG_SIZE))
    testing_images.append(image)
    prog.update(index+1) 

In [None]:
X_test = np.ndarray(shape=(len(test_data['image_id']), IMG_SIZE, IMG_SIZE,3),dtype=np.float32)
for index, image in enumerate(testing_images): 
    X_test[index] = img_to_array(image)
    X_test[index] = testing_images[index]
X_test = X_test/255
print(X_test.shape)

In [None]:
test_ids = test_data['image_id']
pred = model.predict(X_test)
res = pd.DataFrame()
res['image_id'] = test_ids
res['healthy'] = pred[:, 0]
res['multiple_diseases'] = pred[:, 1]
res['rust'] = pred[:, 2]
res['scab'] = pred[:, 3]
res.to_csv('apr_12_ite_1_sub_sam_lr_0005.csv', index=False)
res.head(10)