In [None]:
import pandas as pd
import numpy as np

train = pd.read_csv('/kaggle/input/plant-pathology-2020-fgvc7/train.csv')
test = pd.read_csv('/kaggle/input/plant-pathology-2020-fgvc7/test.csv')

# Append ".jpg" to make things easier later
train['image_id'] = train['image_id'] + '.jpg'
test['image_id'] = test['image_id'] + '.jpg'

In [None]:
from matplotlib import pyplot as plt
from matplotlib import image as mpimg

IMG_PATH = '/kaggle/input/plant-pathology-2020-fgvc7/images/'

for i in range(5):
    plt.imshow(mpimg.imread(IMG_PATH + train.iloc[i,:]['image_id']))
    if train.iloc[i,:]['healthy'] == 1:
        plt.title('healthy')
    elif train.iloc[i,:]['multiple_diseases'] == 1:
        plt.title('multiple_diseases')
    elif train.iloc[i,:]['rust'] == 1:
        plt.title('rust')
    else:
        plt.title('scab')
    plt.show()


# EfficientNet model

Now we'll train a model using EfficientNet transfer learning.

In [None]:
!pip install tensorflow-addons

In [None]:
!pip install efficientnet

In [None]:
from sklearn.model_selection import train_test_split

# Training-validation split
training, validation = train_test_split(train, 
                                        test_size = 0.2,
                                        random_state = 42)

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

SIZE = 224
BATCH = 32
TARGETS = ['healthy','multiple_diseases','rust','scab']

# image augmentations
image_gen = ImageDataGenerator(rescale=1./255,
                                rotation_range=20,
                                width_shift_range=0.2,
                                height_shift_range=0.2,
                                zoom_range=0.2,
                                brightness_range=[0.5, 1.5],
                                horizontal_flip=True,
                                vertical_flip=True)

# flow_from_dataframe generators
train_generator = image_gen\
    .flow_from_dataframe(train,
                        directory=IMG_PATH,
                        target_size=(SIZE, SIZE),
                        x_col="image_id",
                        y_col=TARGETS,
                        class_mode='raw',
                        shuffle=False,
                        batch_size=BATCH)

validation_generator = image_gen\
    .flow_from_dataframe(validation,
                        directory=IMG_PATH,
                        target_size=(SIZE, SIZE),
                        x_col="image_id",
                        y_col=TARGETS,
                        class_mode='raw',
                        shuffle=False,
                        batch_size=BATCH)

test_generator = image_gen\
    .flow_from_dataframe(test,
                        directory=IMG_PATH,
                        target_size=(SIZE, SIZE),
                        x_col="image_id",
                        y_col=None,
                        class_mode=None,
                        shuffle=False,
                        batch_size=BATCH)

In [None]:
import efficientnet.keras as efn 
import tensorflow_addons as tfa
from tensorflow.keras.callbacks import Callback
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras.callbacks import ReduceLROnPlateau, ModelCheckpoint
from keras.optimizers import Adadelta

# Callbacks
## Keep the best model
mc = ModelCheckpoint('model.hdf5', save_best_only=True, verbose=0, monitor='val_loss', mode='min')

## Reduce learning rate if it gets stuck in a plateau
rlr = ReduceLROnPlateau(monitor='val_loss', factor=0.3, patience=3, min_lr=0.000001, verbose=1)

# Model
## Define the base model with EfficientNet weights
model = efn.EfficientNetB4(weights = 'imagenet', 
                           include_top = False, 
                           input_shape = (SIZE, SIZE, 3))

## Output layer
x = model.output
x = GlobalAveragePooling2D()(x)
x = Dense(128, activation="relu")(x)
x = Dense(64, activation="relu")(x)
predictions = Dense(4, activation="softmax")(x)

## Compile and run
model = Model(inputs=model.input, outputs=predictions)

model.compile(optimizer='adam',
              loss=tfa.losses.SigmoidFocalCrossEntropy(), 
              metrics=['accuracy'])

model_history = model.fit(train_generator,
                            validation_data=validation_generator,
                            steps_per_epoch=train_generator.n/BATCH,
                            validation_steps=validation_generator.n/BATCH,
                            epochs=7,
                            verbose=1,
                            callbacks = [rlr, mc])

In [None]:
# Load best model
model.load_weights("model.hdf5")

In [None]:
# Plot training/test
acc = model_history.history['accuracy']
val_acc = model_history.history['val_accuracy']
loss = model_history.history['loss']
val_loss = model_history.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'r', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()

# Make predictions

In [None]:
# Make predictions
preds = model.predict(test_generator, steps=test_generator.n/BATCH)

# Prepare submission

In [None]:
# Make submission
sample_sub = pd.read_csv('/kaggle/input/plant-pathology-2020-fgvc7/sample_submission.csv')

submission = pd.DataFrame({'image_id': sample_sub['image_id'],
                           'healthy': preds[:,0],
                           'multiple_diseases': preds[:,1],
                           'rust': preds[:,2],
                           'scab': preds[:,3]
                         })
submission.to_csv("submission.csv", index=False)
submission.head()