# **Objectives**

The main objective of the competition is to develop machine learning-based models to accurately classify a given leaf image from the test dataset to a particular disease category, and to identify an individual disease from multiple disease symptoms on a single leaf image.

# **Resources**

Details and background information on the dataset and Kaggle competition ‘Plant Pathology 2020 Challenge’ were published as a peer-reviewed research article. If you use the dataset for your project, please cite the following

https://bsapubs.onlinelibrary.wiley.com/doi/10.1002/aps3.11390

# Submission Format
For every author in the dataset, submission files should contain two columns: image and labels. labels should be a space-delimited list.

The file should contain a header and have the following format:

* image, labels
* 85f8cb619c66b863.jpg,healthy
* ad8770db05586b59.jpg,healthy
* c7b03e718489f3ca.jpg,healthy

### Prerequisites

In [None]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import os
import PIL
import PIL.Image
import tensorflow as tf
from keras.utils import to_categorical
from keras.preprocessing import image
from tqdm import tqdm
!pip install tqdm
import csv
from PIL import Image
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import ModelCheckpoint,EarlyStopping
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dropout, Dense, Activation, Flatten, Conv2D, MaxPooling2D, BatchNormalization

### Reading data

In [None]:
train_dir= '../input/plant-pathology-2021-fgvc8/train_images'
test_dir =  '../input/plant-pathology-2021-fgvc8/test_images'
train = pd.read_csv('../input/plant-pathology-2021-fgvc8/train.csv')
train.head

In [None]:
train = pd.DataFrame(train,columns = ['image','labels'])
train['labels'].value_counts()

In [None]:
train['labels'] = train['labels'].apply(lambda s: s.split(' '))
train[:10]

In [None]:
# Use the Image Data Generator to import the images from the dataset
from tensorflow.keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(rescale = 1/255.,
    rotation_range = 10,#Performing Rotation
    zoom_range = 0.2,
    horizontal_flip=True,
    vertical_flip=True,
    validation_split= 0.2)




In [None]:
HEIGHT = 384
WIDTH=384
SEED = 98
BATCH_SIZE=64


In [None]:
train_ds = datagen.flow_from_dataframe(
    train,
    directory = '../input/resized-plant2021/img_sz_384',# We are using the resized images otherwise it will take a lot of time to train 
    x_col = 'image',
    y_col = 'labels',
    subset="training",
    color_mode="rgb",
    target_size = (HEIGHT,WIDTH),
    class_mode="categorical",
    batch_size=BATCH_SIZE,
    shuffle=True,
    seed=SEED,
)


val_ds = datagen.flow_from_dataframe(
    train,
    directory = '../input/resized-plant2021/img_sz_384',# We are using the resized images otherwise it will take a lot of time to train 
    x_col = 'image',
    y_col = 'labels',
    subset="validation",
    color_mode="rgb",
    target_size = (HEIGHT,WIDTH),
    class_mode="categorical",
    batch_size=BATCH_SIZE,
    shuffle=True,
    seed=SEED,
)

In [None]:
example = next(train_ds)
print(example[0].shape)
plt.imshow(example[0][0,:,:,:])
plt.show()

In [None]:
model=Sequential()

model.add(Conv2D(32, (3, 3), padding="same", activation='relu', input_shape=(HEIGHT, WIDTH,3)))
model.add(BatchNormalization(axis=3))
model.add(MaxPooling2D(pool_size=(3, 3)))
model.add(Dropout(0.25))
        
model.add(Conv2D(64, (3, 3), padding="same", activation='relu'))
model.add(BatchNormalization(axis=3))
model.add(Conv2D(64, (3, 3), padding="same", activation='relu'))
model.add(BatchNormalization(axis=1))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(128, (3, 3), padding="same", activation='relu'))
model.add(BatchNormalization(axis=3))
model.add(Conv2D(128, (3, 3), padding="same", activation='relu'))
model.add(BatchNormalization(axis=3))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(64))
model.add(Activation("relu"))
model.add(Dropout(0.25))
model.add(BatchNormalization())
model.add(Dropout(0.25))
model.add(Dense(6))
model.add(Activation("softmax"))

model.summary()

In [None]:
# Compile the Model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.01, decay=0.01/30),
    loss='binary_crossentropy',
    metrics=['accuracy'])


In [None]:
checkpoint=ModelCheckpoint(r'Models\model-x.h5',
                          monitor='val_loss',
                          mode='min',
                          save_best_only=True,
                          verbose=1)
callbacks=[checkpoint]

In [None]:
cnn_model=model.fit(train_ds,
                    validation_data=val_ds,
                    epochs=25,
                    shuffle=True,
                    verbose=1,
                    batch_size=BATCH_SIZE,
#                     steps_per_epoch=train_ds.samples//64,
#                     validation_steps=val_ds.samples//64,
                    callbacks=callbacks)

In [None]:
# cnn_model.save('model-cnn.h5')
model.save('model.h5')

In [None]:
model_history = cnn_model.history

plt.figure()
plt.plot(model_history['accuracy'])
plt.plot(model_history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'])
plt.savefig('accuracy')
plt.show()

In [None]:
plt.figure()
plt.plot(model_history['loss'])
plt.plot(model_history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'])
plt.savefig('loss')
plt.show()

In [None]:
submission = pd.read_csv('/kaggle/input/plant-pathology-2021-fgvc8/sample_submission.csv')
submission.head()

In [None]:
test_datagen = ImageDataGenerator(
    rescale = 1./255
)
INPUT_SIZE = (HEIGHT,WIDTH,3)
test_generator =  test_datagen.flow_from_dataframe(
    submission,
    directory="../input/plant-pathology-2021-fgvc8/test_images",
    x_col='image',
    y_col=None,
    class_mode=None,
    target_size=INPUT_SIZE[:2]
)

In [None]:
preds = model.predict(test_generator)
print(preds)

In [None]:
preds = preds.tolist()
indices = []
for pred in preds:
    temp = []
    for category in pred:
        if category>=0.23:
            temp.append(pred.index(category))
    if temp!=[]:
        indices.append(temp)
    else:
        temp.append(np.argmax(pred))
        indices.append(temp)
    
print(indices)

In [None]:
labels = (train_ds.class_indices)
labels = dict((v,k) for k,v in labels.items())
print(labels)
testlabels = []
for image in indices:
    temp = []
    for i in image:
        temp.append(str(labels[i]))
    testlabels.append(' '.join(temp))
print(testlabels)

In [None]:
submission['labels'] = testlabels
submission.head()

In [None]:
submission.to_csv('submission.csv', index=False)