## Imports

In [None]:
#Importing necessary modules
import pandas as pd
from sklearn.preprocessing import MultiLabelBinarizer
import numpy as np
import os
import cv2
import matplotlib.pyplot as plt
import shutil
import math
import pickle

In [None]:
#Importing modules for model implementation and trainning
import tensorflow_addons as tfa
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential, load_model, Model
from tensorflow.keras.layers import Dense, Flatten, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.densenet import DenseNet169, preprocess_input
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

## The trainning images were resized to 224x224 before trainning


## Paths

In [None]:
train_csv_path = './train.csv'
resized_imgs_path = './resized_images'

## Reading 'train.csv'

In [None]:
df = pd.read_csv(train_csv_path)
df["labels"] = df["labels"].apply(lambda x:x.split(" "))
df.head()

## Encoding labels

In [None]:
# One hot enconding
mlb = MultiLabelBinarizer()
onehot_labels = mlb.fit_transform(df["labels"])
print(mlb.classes_)

# Dataframe with one hot encoding labels
df_labels = pd.DataFrame(onehot_labels, columns=mlb.classes_, index=df.index)
df_labels

## Data augmentation and training batches

In [None]:
# Data augmentation
IMG_SIZE = [224, 224]
generator = ImageDataGenerator(
                            rotation_range=5,
                            zoom_range=0.1,
                            shear_range=0.05,
                            horizontal_flip=True,
                            validation_split=0.2,
                            preprocessing_function= preprocess_input)

In [None]:
#Generating training batches

train_generator = generator.flow_from_dataframe(
        dataframe= df,
        subset= 'training',
        directory= resized_imgs_path,
        x_col= 'image',
        y_col= 'labels',
        target_size= IMG_SIZE,
        shuffle= True,
        seed = 40,
        batch_size= 8,
        color_mode = 'rgb',
        class_mode= 'categorical')

test_generator = generator.flow_from_dataframe(
        dataframe= df,
        subset= 'validation',
        directory= resized_imgs_path,
        x_col= 'image',
        y_col= 'labels',
        target_size= IMG_SIZE,
        shuffle= True,
        seed = 40,
        batch_size= 8,
        color_mode = 'rgb',
        class_mode= 'categorical')

## Training
- Densenet169 with fine tunning
- ReduceLRonPlateau
- EarlyStopping
- Optimizer: SGD with initial learning rate 0.0001 and momentum 0.9
- Metrics: Accuracy and F1 score

In [None]:
# Download pre-treined weights
base_model = DenseNet169(include_top=False, weights='imagenet', input_shape= IMG_SIZE + [3])

# Adding top layers
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.2)(x)
x = Dense(64, activation='relu')(x)
prediction = Dense(6, activation='sigmoid')(x)

model = Model(inputs=base_model.input, outputs=prediction)

# Metric used to evaluate model
f1 = tfa.metrics.F1Score(num_classes=6, average='macro')

# Early stopping - Stop trainning if the metric 'val_f1_score' does not improve
es= EarlyStopping(
    patience=5, 
    monitor='val_f1_score', 
    mode='max', 
    restore_best_weights=True)

# ReduceLRonPlateau - Reduce learning rate if the metric 'val_loss' does not improve
lr= tf.keras.callbacks.ReduceLROnPlateau(
    monitor="val_loss",
    factor=0.1,
    patience=3,
    verbose=1,
    mode="min",
    min_delta=0.01
)


# Compiling model
model.compile(loss='binary_crossentropy',
                    optimizer=SGD(learning_rate= 0.0001, momentum=0.9),
                    metrics=['accuracy', f1])
# Train
history = model.fit(x=train_generator, validation_data=test_generator, epochs=50, verbose=1, callbacks=[lr, es])

### My output

In [None]:
"""
Epoch 1/50
1864/1864 [==============================] - 364s 188ms/step - loss: 0.5194 - accuracy: 0.2956 - f1_score: 0.1968 - val_loss: 0.3470 - val_accuracy: 0.5762 - val_f1_score: 0.4155
Epoch 2/50
1864/1864 [==============================] - 352s 189ms/step - loss: 0.3407 - accuracy: 0.5807 - f1_score: 0.4407 - val_loss: 0.2411 - val_accuracy: 0.7335 - val_f1_score: 0.6551
Epoch 3/50
1864/1864 [==============================] - 343s 184ms/step - loss: 0.2664 - accuracy: 0.6954 - f1_score: 0.6180 - val_loss: 0.1828 - val_accuracy: 0.8089 - val_f1_score: 0.7650
Epoch 4/50
1864/1864 [==============================] - 349s 187ms/step - loss: 0.2252 - accuracy: 0.7462 - f1_score: 0.6959 - val_loss: 0.1561 - val_accuracy: 0.8336 - val_f1_score: 0.8020
Epoch 5/50
1864/1864 [==============================] - 355s 190ms/step - loss: 0.2018 - accuracy: 0.7816 - f1_score: 0.7349 - val_loss: 0.1379 - val_accuracy: 0.8588 - val_f1_score: 0.8308
Epoch 6/50
1864/1864 [==============================] - 357s 191ms/step - loss: 0.1863 - accuracy: 0.8009 - f1_score: 0.7606 - val_loss: 0.1305 - val_accuracy: 0.8634 - val_f1_score: 0.8364
Epoch 7/50
1864/1864 [==============================] - 356s 191ms/step - loss: 0.1697 - accuracy: 0.8209 - f1_score: 0.7800 - val_loss: 0.1208 - val_accuracy: 0.8763 - val_f1_score: 0.8545
Epoch 8/50
1864/1864 [==============================] - 356s 191ms/step - loss: 0.1673 - accuracy: 0.8214 - f1_score: 0.7886 - val_loss: 0.1162 - val_accuracy: 0.8792 - val_f1_score: 0.8567
Epoch 9/50
1864/1864 [==============================] - 363s 195ms/step - loss: 0.1611 - accuracy: 0.8293 - f1_score: 0.7971 - val_loss: 0.1119 - val_accuracy: 0.8790 - val_f1_score: 0.8541
Epoch 10/50
1864/1864 [==============================] - 368s 198ms/step - loss: 0.1533 - accuracy: 0.8427 - f1_score: 0.8093 - val_loss: 0.1102 - val_accuracy: 0.8870 - val_f1_score: 0.8613
Epoch 11/50
1864/1864 [==============================] - 370s 199ms/step - loss: 0.1505 - accuracy: 0.8441 - f1_score: 0.8104 - val_loss: 0.1081 - val_accuracy: 0.8929 - val_f1_score: 0.8718
Epoch 12/50
1864/1864 [==============================] - 361s 194ms/step - loss: 0.1440 - accuracy: 0.8479 - f1_score: 0.8106 - val_loss: 0.1040 - val_accuracy: 0.8937 - val_f1_score: 0.8708
Epoch 13/50
1864/1864 [==============================] - 367s 197ms/step - loss: 0.1355 - accuracy: 0.8605 - f1_score: 0.8282 - val_loss: 0.1032 - val_accuracy: 0.8956 - val_f1_score: 0.8730

Epoch 00013: ReduceLROnPlateau reducing learning rate to 9.999999747378752e-06.
Epoch 14/50
1864/1864 [==============================] - 368s 198ms/step - loss: 0.1319 - accuracy: 0.8677 - f1_score: 0.8375 - val_loss: 0.1011 - val_accuracy: 0.8935 - val_f1_score: 0.8706
Epoch 15/50
1864/1864 [==============================] - 363s 195ms/step - loss: 0.1345 - accuracy: 0.8655 - f1_score: 0.8337 - val_loss: 0.1016 - val_accuracy: 0.8943 - val_f1_score: 0.8746
Epoch 16/50
1864/1864 [==============================] - 360s 193ms/step - loss: 0.1320 - accuracy: 0.8634 - f1_score: 0.8333 - val_loss: 0.1020 - val_accuracy: 0.8929 - val_f1_score: 0.8695

Epoch 00016: ReduceLROnPlateau reducing learning rate to 9.999999747378752e-07.
Epoch 17/50
1864/1864 [==============================] - 356s 191ms/step - loss: 0.1308 - accuracy: 0.8698 - f1_score: 0.8399 - val_loss: 0.1025 - val_accuracy: 0.8921 - val_f1_score: 0.8706
Epoch 18/50
1864/1864 [==============================] - 367s 197ms/step - loss: 0.1357 - accuracy: 0.8587 - f1_score: 0.8277 - val_loss: 0.1011 - val_accuracy: 0.8972 - val_f1_score: 0.8747
Epoch 19/50
1864/1864 [==============================] - 366s 196ms/step - loss: 0.1363 - accuracy: 0.8606 - f1_score: 0.8314 - val_loss: 0.1002 - val_accuracy: 0.8951 - val_f1_score: 0.8750
Epoch 20/50
1864/1864 [==============================] - 370s 198ms/step - loss: 0.1309 - accuracy: 0.8667 - f1_score: 0.8349 - val_loss: 0.0991 - val_accuracy: 0.8980 - val_f1_score: 0.8785
Epoch 21/50
1864/1864 [==============================] - 359s 192ms/step - loss: 0.1355 - accuracy: 0.8602 - f1_score: 0.8337 - val_loss: 0.1024 - val_accuracy: 0.8951 - val_f1_score: 0.8729
Epoch 22/50
1864/1864 [==============================] - 367s 197ms/step - loss: 0.1285 - accuracy: 0.8716 - f1_score: 0.8398 - val_loss: 0.1006 - val_accuracy: 0.8951 - val_f1_score: 0.8715

Epoch 00022: ReduceLROnPlateau reducing learning rate to 9.999999974752428e-08.
Epoch 23/50
1864/1864 [==============================] - 365s 196ms/step - loss: 0.1320 - accuracy: 0.8618 - f1_score: 0.8287 - val_loss: 0.1013 - val_accuracy: 0.8945 - val_f1_score: 0.8731
Epoch 24/50
1864/1864 [==============================] - 368s 197ms/step - loss: 0.1317 - accuracy: 0.8650 - f1_score: 0.8345 - val_loss: 0.1005 - val_accuracy: 0.8964 - val_f1_score: 0.8769
Epoch 25/50
1864/1864 [==============================] - 368s 197ms/step - loss: 0.1302 - accuracy: 0.8684 - f1_score: 0.8359 - val_loss: 0.1009 - val_accuracy: 0.8959 - val_f1_score: 0.8734

Epoch 00025: ReduceLROnPlateau reducing learning rate to 1.0000000116860975e-08.
"""

## Saving model and trainning history

In [None]:
model.save("densenet169_100ft.h5")
with open('densenet169_history', 'wb') as file_pi:
    pickle.dump(history.history, file_pi)

## Training results
- ReduceOnPlateau did not improve results

In [None]:
# Plotting f1-score history
plt.plot(history.history['f1_score'])
plt.plot(history.history['val_f1_score'])
plt.title('model f1-score')
plt.ylabel('f1-score')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

In [None]:
# Plotting loss history
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

In [None]:
# Plotting learning rate history
plt.plot(history.history['lr'])
plt.title('model learning rate')
plt.ylabel('lr')
plt.xlabel('epoch')
plt.yscale("log")
plt.show()

## Predictions with test images

In [None]:
# Load model
model = load_model("../input/vgg16-imagenet-pre-treined-model/densenet169_100ft.h5")

filenames = []
test_imgs_path = '../input/plant-pathology-2021-fgvc8/test_images'
preds = []

# Build numpy array with predictions
for img in os.listdir(test_imgs_path):
    filenames.append(img)
    img = os.path.join(test_imgs_path, img)
    img_array = cv2.imread(img)
    
    #Pre-processing input
    img_array = cv2.resize(img_array, (224, 224))
    img_array = cv2.cvtColor(img_array, cv2.COLOR_BGR2RGB)
    img_array = preprocess_input(img_array)
    img_array = np.expand_dims(img_array, axis=0)
    
    #Prediction
    if len(preds) != 0:
        preds = np.vstack([preds, model.predict(img_array)])
    else:
        preds = model.predict(img_array)

# Processing predictions to avoid weird classifications 
threshold = 0.4
for i, pred in enumerate(preds):
    argmax = np.argmax(pred)
    if not (pred > threshold).any():
        preds[i][argmax] = 1
    elif argmax == 2:
        preds[i] = np.array([0, 0, 1, 0, 0, 0])
    else:
        preds[i][2] = 0
        
# Applying treshold
preds = (preds > threshold).astype(int)

## Generate submit file

In [None]:
predictions=[]
labels = mlb.classes_
for row in preds:
    l=[]
    for index,cls in enumerate(row):
        if cls:
            l.append(labels[index])
    predictions.append(" ".join(l))
    
results=pd.DataFrame({"image":filenames,
                      "labels":predictions})
results.to_csv("./submission.csv",index=False)
results