In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
load_dir = "../input/plant-pathology-2021-fgvc8/"
train = pd.read_csv(load_dir + "train.csv")

train_image_dir = "/kaggle/input/plant-pathology-2021-fgvc8/train_images/"
train_df = pd.DataFrame(train, columns = ["image", "labels"])
train_df["labels"] = train['labels'].apply(lambda s: s.split(' '))

In [None]:
train_df.head()

In [None]:
from PIL import Image

def visualizeImage(idx):
  fd = train_df.iloc[idx]
  img = fd.image
  img = Image.open("/kaggle/input/plant-pathology-2021-fgvc8/train_images/" + img)
  fig,ax = plt.subplots(figsize = (15, 7))
  ax.imshow(img)
  ax.grid(False)
  plt.show()
  print(f"labels: {train_df.iloc[idx][1]}")
  
visualizeImage(1)

In [None]:
# Preprocessing the data
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

imageSize = 227
batchSize = 16
NUM_CLASSES = 6

trainGen = ImageDataGenerator(preprocessing_function = tf.keras.applications.resnet50.preprocess_input, 
                             rescale = 1./255,
                             shear_range = 0.2,
                             zoom_range = 0.2,
                             horizontal_flip = True,
                             validation_split = 0.2)

train_generator = trainGen.flow_from_dataframe(dataframe = train_df,
                                             directory = train_image_dir,
                                             x_col = "image",
                                             y_col = "labels",
                                             class_mode= "categorical", 
                                               subset = "training", 
                                               target_size = (imageSize, imageSize),
                                               batch_size = batchSize,
                                              seed = 672502037)

valid_generator = trainGen.flow_from_dataframe(dataframe = train_df,
                                             directory = train_image_dir,
                                             x_col = "image",
                                             y_col = "labels",
                                             class_mode= "categorical", 
                                               subset = "validation", 
                                               target_size = (imageSize, imageSize),
                                               batch_size = batchSize,
                                              seed = 672502037)

In [None]:
import keras
from tensorflow.keras.optimizers import Adam, SGD
from keras.models import Model
from tensorflow.keras.layers import Dense, Dropout

baseNet = tf.keras.applications.ResNet50(weights = "imagenet", include_top = False, pooling = "avg")

x = baseNet.output
x = Dense(1024, activation='relu')(x)
x = Dense(64, activation='relu')(x)
predictions = Dense(NUM_CLASSES, activation='sigmoid')(x)

# this is the model we will train
model = Model(inputs=baseNet.input, outputs=predictions)

#for layer in baseNet.layers:
#    layer.trainable = False

In [None]:
from keras import backend as K

def recall(y_true, y_pred):
    tp = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = tp / (possible_positives + K.epsilon())
    return recall

def precision(y_true, y_pred):
    tp = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = tp / (predicted_positives + K.epsilon())
    return precision

def f1(y_true, y_pred):
    precision = precision(y_true, y_pred)
    recall = recall(y_true, y_pred)
    return 2*((precision * recall) / (precision + recall + K.epsilon()))

earlystop = keras.callbacks.EarlyStopping(
    monitor='val_loss',
    # threshold to consider as no change
    min_delta=0.02,
    # stop if  epochs with no change
    patience=2,
    verbose=1,
    mode='min',
    restore_best_weights= True
)

opt = keras.optimizers.Adam(lr=0.001, decay=1e-4)
model.compile(optimizer = opt, loss = "binary_crossentropy", metrics = ["accuracy", f1, recall, precision])
history = model.fit(train_generator, steps_per_epoch = 932, epochs=10, validation_data=valid_generator, verbose=True,
         validation_steps = 187, callbacks = earlystop)

In [None]:
#model.save("trainedModel.h5")

In [None]:
for i, layer in enumerate(baseNet.layers):
   print(i, layer.name)

In [None]:
print(history.history.keys())

In [None]:
# summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper right')
plt.show()

# summarize history for accuracy
plt.plot(history.history['f1_m'])
plt.plot(history.history['val_f1_m'])
plt.title('model f1_m')
plt.ylabel('f1_m')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper right')
plt.show()

# summarize history for accuracy
plt.plot(history.history['recall_m'])
plt.plot(history.history['val_recall_m'])
plt.title('model recall_m')
plt.ylabel('recall_m')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper right')
plt.show()

# summarize history for accuracy
plt.plot(history.history['precision_m'])
plt.plot(history.history['val_precision_m'])
plt.title('model precision_m')
plt.ylabel('precision_m')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper right')
plt.show()

# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper right')
plt.show()

In [None]:
THRESHOLD = 0.5

from tqdm import tqdm
import PIL

test_df = pd.read_csv('../input/plant-pathology-2021-fgvc8/sample_submission.csv')

for img in tqdm(test_df['image']):
    path = '../input/plant-pathology-2021-fgvc8/test_images/' + str(img_name)
    with PIL.Image.open(path) as img:
        img = img.resize((imageSize, imageSize))
        img.save(f'./{img}')
        
test_generator = trainGen.flow_from_dataframe(
    dataframe = test_df,
    directory = './',
    x_col = "image",
    y_col = "labels",
    class_mode = "categorical",
    target_size = (imageSize, imageSize),
    batch_size = batchSize,
    seed = 672502037,
)

predResult = []
prediction = model.predict(test_generator).tolist()
for img in prediction:
    predictionLabels = []
    index = 0
    for categoryScore in img:
        if categoryScore > THRESHOLD:
            predictionLabels.append(index)
        index += 1
    if not predictionLabels:
        predResult.append(np.argmax(prediction))
    else:
        predResult.append(predictionLabels)

print(predResult)

In [None]:
label_map = (train_generator.class_indices)
final_label_map = dict((v,k) for k,v in label_map.items())
print(final_label_map)

for i in range(len(test_df)):
    labels = ""
    for idx in predResult[i]:
        labels += final_label_map[idx] + " "
    test_df.iloc[i, 1] = labels.strip()

In [None]:
test_df.to_csv('submission.csv', index=False)

In [None]:
test_df.head()