In [None]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
from imgaug import augmenters as iaa
import cv2
from PIL import Image
%matplotlib inline
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer

import seaborn as sns
import warnings
    
warnings.filterwarnings("ignore")
plt.style.use('fivethirtyeight')

<img src = "https://empire-s3-production.bobvila.com/articles/wp-content/uploads/2012/09/uvm.edu-apple-tree.jpg">

# Problem Statement - Plant Pathology 2021 - FGVC8
## Identify the category of foliar diseases in apple trees
*Apples are one of the most important temperate fruit crops in the world. Foliar (leaf) diseases pose a major threat to the overall productivity and quality of apple orchards. The current process for disease diagnosis in apple orchards is based on manual scouting by humans, which is time-consuming and expensive.*

*Although computer vision-based models have shown promise for plant disease identification, there are some limitations that need to be addressed. Large variations in visual symptoms of a single disease across different apple cultivars, or new varieties that originated under cultivation, are major challenges for computer vision-based disease identification. These variations arise from differences in natural and image capturing environments, for example, leaf color and leaf morphology, the age of infected tissues, non-uniform image background, and different light illumination during imaging etc.*

*Plant Pathology 2020-FGVC7 challenge competition had a pilot dataset of 3,651 RGB images of foliar disease of apples. For Plant Pathology 2021-FGVC8, we have significantly increased the number of foliar disease images and added additional disease categories. This yearâ€™s dataset contains approximately 23,000 high-quality RGB images of apple foliar diseases, including a large expert-annotated disease dataset. This dataset reflects real field scenarios by representing non-homogeneous backgrounds of leaf images taken at different maturity stages and at different times of day under different focal camera settings.*

<b>The main objective of the competition is to develop machine learning-based models to accurately classify a given leaf image from the test dataset to a particular disease category, and to identify an individual disease from multiple disease symptoms on a single leaf image.</b>

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential,Model
import tensorflow_addons as tfa
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import layers
from keras.layers import Activation, Dropout, Flatten, Dense, Input, Conv2D, MaxPooling2D, BatchNormalization, Concatenate, ReLU, LeakyReLU
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import NASNetLarge, ResNet101, DenseNet121,InceptionResNetV2,Xception,MobileNetV2
from tensorflow.keras.applications.resnet import preprocess_input
from tensorflow.keras.metrics import Precision, Recall
from tqdm.keras import TqdmCallback

from keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint, ModelCheckpoint

from keras.models import load_model

In [None]:
DATA = "../input/plant-pathology-2021-fgvc8"
PATH_TO_TRAIN = DATA + '/train_images/'
data_train = pd.read_csv(os.path.join(DATA,'train.csv'))
submission = pd.read_csv(os.path.join(DATA,'sample_submission.csv'))

In [None]:
data_train.head()

In [None]:
data_train['labels'] = data_train['labels'].apply(lambda string: string.split(' '))
data_train

In [None]:
images = data_train['image'].values

# Extract 9 random images from it
random_images = [np.random.choice(images) for i in range(9)]

# Location of the image dir
img_dir = DATA+'/train_images'

print('Display Random Images')

# Adjust the size of your images
plt.figure(figsize=(10,8))

# Iterate and plot random images
for i in range(9):
    plt.subplot(3, 3, i + 1)
    img = plt.imread(os.path.join(img_dir, random_images[i]))
    plt.imshow(img, cmap='gray')
    plt.axis('off')
    
# Adjust subplot parameters to give specified padding
plt.tight_layout()

In [None]:
data_gen = ImageDataGenerator( rotation_range = 10,
    width_shift_range = 0.1,
    height_shift_range = 0.1,
    brightness_range = None,
    shear_range = 0.1,
    zoom_range = 0.1,
    rescale = 1./255,
    horizontal_flip=True,
    vertical_flip=True,
    
    validation_split= 0.2)

train_generator = data_gen.flow_from_dataframe(
    data_train,
    directory='../input/resized-plant2021/img_sz_256',
    subset='training',
    x_col='image',
    y_col='labels',
    target_size=(256,256),
    color_mode='rgb',
    class_mode='categorical',
    batch_size=16,
    shuffle=True,
    seed= tf.random.set_seed(42)
    )
validation_generator = data_gen.flow_from_dataframe(
    data_train,
    directory='../input/resized-plant2021/img_sz_256',
    subset='validation',
    x_col='image',
    y_col='labels',
    target_size=(256,256),
    color_mode='rgb',
    class_mode='categorical',
    batch_size=16,
    shuffle=True,
    seed= tf.random.set_seed(42)
    )

In [None]:
def show_history(history):
    fig, ax = plt.subplots(1, 2, figsize=(15,5))
    ax[0].set_title('loss')
    ax[0].plot(history.epoch, history.history["loss"], label="Train loss")
    ax[0].plot(history.epoch, history.history["val_loss"], label="Validation loss")
    ax[1].set_title('F1')
    ax[1].plot(history.epoch, history.history["f1_score"], label="Train f1")
    ax[1].plot(history.epoch, history.history["val_f1_score"], label="Validation f1")
    ax[0].legend()
    ax[1].legend()


In [None]:
def create_model(input_shape):
    
    dropRate = 0.25
    
    init = Input(input_shape)
    x = BatchNormalization(axis=-1)(init)
    x = Conv2D(8, (3, 3))(x)
    x = ReLU()(x)
    x = BatchNormalization(axis=-1)(x)
    x = Conv2D(8, (3, 3))(x)
    x = ReLU()(x)
    x = BatchNormalization(axis=-1)(x)
    x = Conv2D(16, (3, 3))(x)
    x = ReLU()(x)
    x = BatchNormalization(axis=-1)(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Dropout(dropRate)(x)
    c1 = Conv2D(16, (3, 3), padding='same')(x)
    c1 = ReLU()(c1)
    c2 = Conv2D(16, (5, 5), padding='same')(x)
    c2 = ReLU()(c2)
    c3 = Conv2D(16, (7, 7), padding='same')(x)
    c3 = ReLU()(c3)
    c4 = Conv2D(16, (1, 1), padding='same')(x)
    c4 = ReLU()(c4)
    x = Concatenate()([c1, c2, c3, c4])
    x = BatchNormalization(axis=-1)(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Dropout(dropRate)(x)
    x = Conv2D(32, (3, 3))(x)
    x = ReLU()(x)
    x = BatchNormalization(axis=-1)(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Dropout(dropRate)(x)
    x = Conv2D(64, (3, 3))(x)
    x = ReLU()(x)
    x = BatchNormalization(axis=-1)(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Dropout(dropRate)(x)
    x = Conv2D(128, (3, 3))(x)
    x = ReLU()(x)
    x = BatchNormalization(axis=-1)(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Dropout(dropRate)(x)
    x = Conv2D(256, (1, 1), activation='relu')(x)
    x = BatchNormalization(axis=-1)(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Dropout(0.25)(x)
    x = Flatten()(x)
    x = Dropout(0.5)(x)
    x = Dense(28)(x)
    x = ReLU()(x)
    x = BatchNormalization(axis=-1)(x)
    x = Dropout(0.1)(x)
    x = Dense(6)(x)
    x = Activation('sigmoid')(x)
    
    model = Model(init, x)
    
    return model

In [None]:
model = create_model(
    input_shape=(256,256,3))

model.summary()

In [None]:
f1 = tfa.metrics.F1Score(num_classes=6, average='macro')

earlystop=EarlyStopping(monitor=f1, patience=10, mode='max', restore_best_weights=True)

reducelrplateau  = ReduceLROnPlateau(monitor='val_loss', patience=3, verbose=0.5, factor=0.5)

model.compile(
    loss='binary_crossentropy',  
    optimizer=Adam(1e-3),
    metrics=[f1])

history = model.fit_generator(
    train_generator,
    steps_per_epoch=100,
    validation_data=validation_generator,
    epochs=50, 
    verbose=1,
    callbacks=[earlystop,reducelrplateau,TqdmCallback(verbose=0)])

In [None]:
show_history(history)

# END
<img src = "https://media.giphy.com/media/9Ai5dIk8xvBm0/giphy.gif">