In [None]:
#IMPORT REQUIRED LIBRARIES:

import numpy as np
import pandas as pd
import os
from re import search
import shutil
from PIL import Image
import matplotlib.pyplot as plt
from tqdm import tqdm
import cv2
import seaborn as sns

from sklearn.preprocessing import MultiLabelBinarizer

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import ModelCheckpoint,EarlyStopping
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dropout, Dense, Activation, Flatten, Conv2D, MaxPooling2D, BatchNormalization

In [None]:
#IMAGE PATH & DATAFRAME:

train_dir= '../input/plant-pathology-2021-fgvc8/train_images'
test_dir =  '../input/plant-pathology-2021-fgvc8/test_images'
train = pd.read_csv('../input/plant-pathology-2021-fgvc8/train.csv')
train.head

In [None]:
train = pd.DataFrame(train,columns = ['image','labels'])
train['labels'].value_counts()

In [None]:
plt.figure(figsize=(35,12))
plt.xticks(fontsize = 30)
plt.yticks(fontsize = 30)
labels = sns.barplot(x=train.labels.value_counts().index,y=train.labels.value_counts())
for item in labels.get_xticklabels():
    item.set_rotation(45)

In [None]:
train['labels'] = train['labels'].apply(lambda s: s.split(' '))
train[:10]

In [None]:
# Use the Image Data Generator to import the images from the dataset
from tensorflow.keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(rescale = 1/255.,
    rotation_range = 10,#Performing Rotation
    width_shift_range = 0.2,
    height_shift_range = 0.2,
    brightness_range = [0.2,1.0],
    shear_range = 0.2,
    zoom_range = 0.2,
    horizontal_flip=True,
    vertical_flip=True,
    validation_split= 0.2)


HEIGHT = 248
WIDTH=248
SEED = 143
BATCH_SIZE=32
train_ds = datagen.flow_from_dataframe(
    train,
    directory = '../input/resized-plant2021/img_sz_256',# We are using the resized images otherwise it will take a lot of time to train 
    x_col = 'image',
    y_col = 'labels',
    subset="training",
    color_mode="rgb",
    target_size = (HEIGHT,WIDTH),
    class_mode="categorical",
    batch_size=BATCH_SIZE,
    shuffle=True,
    seed=SEED,
)


val_ds = datagen.flow_from_dataframe(
    train,
    directory = '../input/resized-plant2021/img_sz_256',# We are using the resized images otherwise it will take a lot of time to train 
    x_col = 'image',
    y_col = 'labels',
    subset="validation",
    color_mode="rgb",
    target_size = (HEIGHT,WIDTH),
    class_mode="categorical",
    batch_size=BATCH_SIZE,
    shuffle=True,
    seed=SEED,
)

In [None]:
example = next(train_ds)
print(example[0].shape)
plt.imshow(example[0][0,:,:,:])
plt.show()

In [None]:
model=Sequential()

model.add(Conv2D(32, (3, 3), padding="same", activation='relu', input_shape=(HEIGHT, WIDTH,3)))
model.add(BatchNormalization(axis=3))
model.add(MaxPooling2D(pool_size=(3, 3)))
model.add(Dropout(0.25))
        
model.add(Conv2D(64, (3, 3), padding="same", activation='relu'))
model.add(BatchNormalization(axis=3))
model.add(Conv2D(64, (3, 3), padding="same", activation='relu'))
model.add(BatchNormalization(axis=1))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(128, (3, 3), padding="same", activation='relu'))
model.add(BatchNormalization(axis=3))
model.add(Conv2D(128, (3, 3), padding="same", activation='relu'))
model.add(BatchNormalization(axis=3))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(64))
model.add(Activation("relu"))
model.add(Dropout(0.25))
model.add(BatchNormalization())
model.add(Dropout(0.25))
model.add(Dense(6))
model.add(Activation("softmax"))

# model.add(Conv2D(64,(3,3),activation='relu',padding='same', strides=(2,2), input_shape=(HEIGHT,WIDTH,3)))
# model.add(MaxPooling2D(2,2))
# model.add(Conv2D(64,(3,3),activation='relu',padding='same'))
# model.add(MaxPooling2D(2,2))
# model.add(Conv2D(64,(3,3),activation='relu',padding='same'))
# model.add(MaxPooling2D(2,2))
# model.add(Conv2D(128,(3,3),activation='relu',padding='same'))
# model.add(MaxPooling2D(2,2))
# model.add(Flatten())
# model.add(Dropout(0.3))
# model.add(Dense(6,activation='softmax'))

# Compile the Model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.01, decay=0.01/30),
    loss='binary_crossentropy',
    metrics=['accuracy'])
model.summary()

In [None]:
checkpoint=ModelCheckpoint(r'Models\CNN_Model.h5',
                          monitor='val_loss',
                          mode='min',
                          save_best_only=True,
                          verbose=1)
earlystop=EarlyStopping(monitor='val_loss',
                       min_delta=0,
                       patience=10,
                       verbose=1,
                       restore_best_weights=True)

callbacks=[checkpoint,earlystop]

In [None]:
cnn_model=model.fit(train_ds,
                    validation_data=val_ds,
                    epochs=30,
                    shuffle=True,
                    verbose=1,
                    batch_size=BATCH_SIZE,
#                     steps_per_epoch=train_ds.samples//128,
#                     validation_steps=val_ds.samples//128,
                    callbacks=callbacks)

In [None]:
model_history = cnn_model.history

plt.figure()
plt.plot(model_history['accuracy'])
plt.plot(model_history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'])
plt.savefig('accuracy')
plt.show()

In [None]:
plt.figure()
plt.plot(model_history['loss'])
plt.plot(model_history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'])
plt.savefig('loss')
plt.show()

In [None]:
submission = pd.read_csv('/kaggle/input/plant-pathology-2021-fgvc8/sample_submission.csv')
submission.head()

In [None]:
test_datagen = ImageDataGenerator(
    rescale = 1./255
)
INPUT_SIZE = (HEIGHT,WIDTH,3)
test_generator =  test_datagen.flow_from_dataframe(
    submission,
    directory="../input/plant-pathology-2021-fgvc8/test_images",
    x_col='image',
    y_col=None,
    class_mode=None,
    target_size=INPUT_SIZE[:2]
)

In [None]:
preds = model.predict(test_generator)
print(preds)

In [None]:
preds = preds.tolist()
indices = []
for pred in preds:
    temp = []
    for category in pred:
        if category>=0.23:
            temp.append(pred.index(category))
    if temp!=[]:
        indices.append(temp)
    else:
        temp.append(np.argmax(pred))
        indices.append(temp)
    
print(indices)

In [None]:
labels = (train_ds.class_indices)
labels = dict((v,k) for k,v in labels.items())
print(labels)

testlabels = []


for image in indices:
    temp = []
    for i in image:
        temp.append(str(labels[i]))
    testlabels.append(' '.join(temp))

print(testlabels)

In [None]:
submission['labels'] = testlabels
submission.head()

In [None]:
submission.to_csv('submission.csv', index=False)