# IMPORTS

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import os

#import json
#import math
import cv2
import PIL
from PIL import Image
import numpy as np

from keras import layers
from keras.applications import DenseNet121
from keras.callbacks import Callback, ModelCheckpoint
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.optimizers import Adam

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import scipy

import tensorflow as tf

from tqdm import tqdm

import gc

%matplotlib inline

# DATA VALUES

In [None]:
TESTING_PHASE=False

BATCH_SIZE = 15
TRAIN_VAL_RATIO = 0.27
EPOCHS = 11
LR = 0.001
IMG_SIZE=128
SEED=2020


# LOAD AND PREPROCESS DATA

In [None]:
train_df = pd.read_csv('../input/cassava-leaf-disease-classification/train.csv')
test_df = pd.read_csv('../input/cassava-leaf-disease-classification/sample_submission.csv')
print('train_df shape: ',train_df.shape)
print('test_df shape: ',test_df.shape)
print(train_df.head())
print(test_df.head())


In [None]:
gc.collect()

In [None]:
def create_mask_for_plant(image):
    image_hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)

    sensitivity = 35
    lower_hsv = np.array([60 - sensitivity, 100, 50])
    upper_hsv = np.array([60 + sensitivity, 255, 255])

    mask = cv2.inRange(image_hsv, lower_hsv, upper_hsv)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (11,11))
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
    
    return mask

def segment_plant(image):
    mask = create_mask_for_plant(image)
    output = cv2.bitwise_and(image, image, mask = mask)
    return output

In [None]:
def preprocess_image(image_path, desired_size=IMG_SIZE):
    
    casava = cv2.imread(image_path)
    im = cv2.resize(casava, (desired_size,desired_size), interpolation = cv2.INTER_AREA)
    im = segment_plant(im)
    im = Image.fromarray(im)
    im = im.resize((desired_size,desired_size)) 
    im = np.array(im)
    return im


In [None]:
if TESTING_PHASE==True:
    train_df=train_df.head(100)


# number of training images from train dataset
N = train_df.shape[0]
# create an empty array for storing the images
x_train = np.empty((N, IMG_SIZE, IMG_SIZE, 3), dtype=np.uint8)

# store images in array

for i, image_id in enumerate(tqdm(train_df['image_id'])):
    x_train[i, :, :, :] = preprocess_image(f'../input/cassava-leaf-disease-classification/train_images/{image_id}')
  

In [None]:
if os.path.exists('../input/cassava-leaf-disease-classification/test_images'):
    # do the same thing as the last cell but on the test\holdout set
    N = test_df.shape[0]
    x_test = np.empty((N, IMG_SIZE, IMG_SIZE, 3), dtype=np.uint8)
    for i, image_id in enumerate(tqdm(test_df['image_id'])):
        x_test[i, :, :, :] = preprocess_image(
            f'../input/cassava-leaf-disease-classification/test_images/{image_id}'
        )
else:
    print("error: no image directory/files")


In [None]:
# one-hot encoding
y_train = pd.get_dummies(train_df['label']).values

print(x_train.shape)
print(y_train.shape)

if os.path.exists('../input/cassava-leaf-disease-classification/test_images'):
    print(x_test.shape)
else:
    print("test images not found")

# SHOW ONE DATA IMAGE

In [None]:
image = cv2.imread("../input/cassava-leaf-disease-classification/train_images/1001749118.jpg")
plt.figure(figsize=(16,10))
plt.imshow(image)
plt.show()

# TRAIN

In [None]:
x_train, x_val, y_train, y_val = train_test_split(
    x_train, y_train, 
    test_size=TRAIN_VAL_RATIO, 
    random_state=2021
)

In [None]:
def create_datagen():
    return ImageDataGenerator(
        zoom_range=0.15,  # set range for random zoom
        fill_mode='constant',
        cval=0.,  # value used for fill_mode = "constant"
        horizontal_flip=True,  # randomly flip images
        vertical_flip=True,  # randomly flip images
    )

# Using original generator
data_generator = create_datagen().flow(x_train, y_train, batch_size=BATCH_SIZE, seed=SEED)

In [None]:
# densenet = DenseNet121(
#     weights='../input/densenet-keras/DenseNet-BC-121-32-no-top.h5',
#     include_top=False,
#     input_shape=(IMG_SIZE,IMG_SIZE,3)
# )

In [None]:
# def build_model(LR=LR):
#     model = Sequential()
#     model.add(densenet)
#     model.add(layers.GlobalAveragePooling2D())
#     model.add(layers.Dropout(0.80))
#     model.add(layers.Dense(5, activation='sigmoid'))
    
#     model.compile(
#         loss='binary_crossentropy',
#         optimizer=Adam(lr=LR),
#         metrics=['accuracy']
#     )
    
#     return model

In [None]:
initial_learning_rate = 0.001 #initial rate
# Rate decay with exponential decay
# new rate = initial_learning_rate * decay_rate ^ (step / decay_steps)

lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate,
    decay_steps=800,
    decay_rate=0.5,
    staircase=True)

In [None]:
def build_model(LR=LR):
    
    model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(64, 1, activation=None,kernel_regularizer=tf.keras.regularizers.l2(0.1), input_shape=(IMG_SIZE,IMG_SIZE,3)),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.BatchNormalization(axis=3),
    tf.keras.layers.LeakyReLU(0.1),
    tf.keras.layers.MaxPool2D(strides=2),
    
    tf.keras.layers.Conv2D(128, 3, activation=None,padding="same",kernel_regularizer=tf.keras.regularizers.l2(0.01)),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.BatchNormalization(axis=3),
    tf.keras.layers.LeakyReLU(0.1),
    tf.keras.layers.MaxPool2D(strides=2),
    
    tf.keras.layers.Dropout(0.4),
    
    tf.keras.layers.Conv2D(256, 5, activation=None,kernel_regularizer=tf.keras.regularizers.l2(0.01)),
    tf.keras.layers.BatchNormalization(axis=3),
    tf.keras.layers.LeakyReLU(0.1),
    tf.keras.layers.MaxPool2D(strides=2),
    
    tf.keras.layers.Conv2D(64, 5, activation=None,padding="same",kernel_regularizer=tf.keras.regularizers.l2(0.01)),
    tf.keras.layers.BatchNormalization(axis=3),
    tf.keras.layers.LeakyReLU(0.1),
    tf.keras.layers.MaxPool2D(strides=2),
    
    tf.keras.layers.Dropout(0.4),
    
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(100,kernel_regularizer=tf.keras.regularizers.l2(0.01), activation=None),
    tf.keras.layers.BatchNormalization(axis=1),
    tf.keras.layers.ReLU(),
    
    tf.keras.layers.Dropout(0.4),
    tf.keras.layers.Dense(50,kernel_regularizer=tf.keras.regularizers.l2(0.01), activation=None),
    tf.keras.layers.Dense(25,kernel_regularizer=tf.keras.regularizers.l2(0.01), activation="relu"),
    tf.keras.layers.BatchNormalization(axis=1),
    tf.keras.layers.ReLU()])
    model.add(layers.Dense(5, activation='softmax'))
    
    model.compile(
        loss='categorical_crossentropy',
        optimizer=Adam(learning_rate=lr_schedule),
        metrics=['accuracy']
    )
    
    return model

In [None]:
model = build_model()
model.summary()

In [None]:
history = model.fit_generator(
    data_generator,
    steps_per_epoch=x_train.shape[0] / BATCH_SIZE,
    epochs=100,
    validation_data=(x_val, y_val)
)

In [None]:
history_df = pd.DataFrame(model.history.history)
history_df[['loss', 'val_loss']].plot()
history_df[['accuracy', 'val_accuracy']].plot()

# PREDICT

In [None]:
y_test = model.predict(x_test)
y_test = np.argmax(y_test, axis=1)
test_df['label'] = y_test
test_df = test_df[["image_id","label"]]
test_df.to_csv('submission.csv',index=False)