In [None]:
import pandas as pd
import numpy as np
from keras.callbacks import ModelCheckpoint, EarlyStopping

In [None]:
# loading dataset
train = pd.read_csv('../input/plant-pathology-2021-fgvc8/train.csv')
train.head()

In [None]:
from sklearn.preprocessing import MultiLabelBinarizer

mlb = MultiLabelBinarizer().fit(train.labels.apply(lambda x : x.split()))
labels = pd.DataFrame(mlb.transform(train.labels.apply(lambda x : x.split())), columns = mlb.classes_)

labels = pd.concat([train['image'], labels], axis=1)
labels.head()

In [None]:
from keras.preprocessing.image import ImageDataGenerator
img_gen = ImageDataGenerator(
    rescale=1/255,
    validation_split=0.2
)

In [None]:
batch_size = 16
IMG_W, IMG_H = 380, 380
target_size = (IMG_W, IMG_H)
# image_path = '/kaggle/input/plant-pathology-2021-fgvc8/train_images'
image_path = '/kaggle/input/resized-plant2021/img_sz_384'

img_iter = img_gen.flow_from_dataframe(
    labels,
    directory=image_path,
    shuffle=True,
    class_mode="raw",
    color_mode="rgb",
    x_col='image',
    y_col=labels.columns.tolist()[1:],
    target_size=target_size,
    batch_size=batch_size,
    subset='training'
)

img_iter_val = img_gen.flow_from_dataframe(
    labels,
    directory=image_path,
    shuffle=True,
    class_mode="raw",
    color_mode="rgb",
    x_col='image',
    y_col=labels.columns.tolist()[1:],
    target_size=target_size,
    batch_size=batch_size,
    subset='validation'
)

In [None]:
from tensorflow.keras.applications import EfficientNetB4
from keras.layers import Dense, Dropout, GlobalAveragePooling2D, BatchNormalization
from keras import Model

base_model = EfficientNetB4(
    include_top=False,
    weights='../input/keras-pretrained-models/EfficientNetB4_NoTop_ImageNet.h5',
    input_shape=(IMG_W, IMG_H, 3)
)

x=base_model.output
x=GlobalAveragePooling2D()(x)
x=BatchNormalization()(x)
x=Dense(64,activation='relu')(x)
x=Dropout(0.3)(x)
x=Dense(32,activation='relu')(x)
x=Dropout(0.3)(x)

predictions = Dense(len(labels.columns.tolist()[1:]), activation='sigmoid')(x)
model = Model(inputs=base_model.input, outputs=predictions)
model.compile(loss="binary_crossentropy", optimizer='adam', metrics=["accuracy"])

In [None]:
model_checkpoint = ModelCheckpoint(
    filepath="./pp2021.h5", 
    monitor='val_loss', 
    save_best_only=True, 
    verbose=1,
    mode='min')

early_stopping = EarlyStopping(
    monitor='val_loss', 
    min_delta=0,
    patience=10, 
    verbose=1, 
    restore_best_weights=True)

history = model.fit(
    img_iter,
    validation_data=img_iter_val,
    epochs=20,
    callbacks=[model_checkpoint, early_stopping]
)

In [None]:
import keras
def load_best_model():
    m = keras.models.load_model(
        "./pp2021.h5"
    )
    return m

In [None]:
model = load_best_model()

In [None]:
import matplotlib.pyplot as plt

def plotHistory(result):
    plt.plot(result.history['accuracy'])
    plt.plot(result.history['val_accuracy'])
    plt.title('Model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()

In [None]:
plotHistory(history)

In [None]:
base_model.trainable = True

model.compile(
    optimizer=keras.optimizers.Adam(1e-5),  # Low learning rate
    loss=keras.losses.BinaryCrossentropy(from_logits=True),
    metrics=["accuracy"],
)

epochs = 10
history = model.fit(
    img_iter,
    validation_data=img_iter_val,
    epochs=epochs,
    callbacks=[model_checkpoint, early_stopping]
)

In [None]:
plotHistory(history)

In [None]:
model = load_best_model()

In [None]:
test_df = pd.read_csv('../input/plant-pathology-2021-fgvc8/sample_submission.csv')
test_path = "../input/plant-pathology-2021-fgvc8/test_images"

In [None]:
image_test_datagen = ImageDataGenerator(
    rescale=1./255,
)

In [None]:
test_generator = image_test_datagen.flow_from_dataframe(
    test_df,
    directory = test_path,
    x_col = "image",
    y_col = "labels",
    target_size = target_size,
    color_mode="rgb",
    batch_size=1,
    shuffle=False,
    seed=42,
    subset=None
)

In [None]:
predicts = model.predict(test_generator)

In [None]:
verdict = (predicts>0.50)
label = labels.columns.tolist()[1:]
answer = []
for i in range(verdict.shape[0]):
    tmp = []
    for j, c in enumerate(label):
        if verdict[i, j]:
            tmp.append(c)
    answer.append(tmp)
    
answer = [' '.join(t) for t in answer]

In [None]:
test_df['labels'] = np.array(answer)
test_df

In [None]:
test_df.to_csv('submission.csv', index=False)