In [1]:
import pandas as pd
import os
import cv2
from PIL import Image
import keras
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import tensorflow as tf
from tensorflow.keras import models, layers
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.applications import Xception
from tensorflow.keras.optimizers import Adam
from keras.models import model_from_json

2.4.3


In [2]:
input_dir = "../input/cassava-leaf-disease-classification"
trainImages = os.path.join(input_dir,"trainImages")
testImages = os.path.join(input_dir,'testImages')

In [3]:
train = pd.read_csv('../input/cassava-leaf-disease-classification/train.csv')
train.head()

Unnamed: 0,image_id,label
0,1000015157.jpg,0
1,1000201771.jpg,3
2,100042118.jpg,1
3,1000723321.jpg,1
4,1000812911.jpg,3


In [4]:
image_list = train['image_id'].to_list()
label_list = train['label'].to_list()

In [5]:
BATCH_SIZE =8 #Mini-Batch Gradient Descent
STEPS_PER_EPOCH = len(train)*0.8 / BATCH_SIZE #0.8
VALIDATION_STEPS = len(train)*0.2 / BATCH_SIZE
EPOCHS = 20 #10
TARGET_SIZE = 350  #350
train.label = train.label.astype('str')


train_data_generator = ImageDataGenerator(
    rotation_range=10,  # randomly rotate images in the range (degrees, 0 to 180)
    zoom_range = 0.1, # Randomly zoom image 
    width_shift_range=0.2,  # randomly shift images horizontally (fraction of total width)
    height_shift_range=0.2,  # randomly shift images vertically (fraction of total height)
    horizontal_flip=True,  # randomly flip images
    validation_split=0.2
)
validation_datagen = ImageDataGenerator(validation_split = 0.2)

In [6]:
train_generator = train_data_generator.flow_from_dataframe(train,
                         directory = os.path.join('../input/cassava-leaf-disease-classification/train_images'),
                         subset = "training",
                         x_col = "image_id",
                         y_col = "label",
                         target_size = (TARGET_SIZE, TARGET_SIZE),
                         batch_size = BATCH_SIZE,
                         class_mode = "sparse",
                         shuffle= True)


validation_generator = validation_datagen.flow_from_dataframe(train,
                         directory = os.path.join('../input/cassava-leaf-disease-classification/train_images'),
                         subset = "validation",
                         x_col = "image_id",
                         y_col = "label",
                         target_size = (TARGET_SIZE, TARGET_SIZE),
                         batch_size = BATCH_SIZE,
                         class_mode = "sparse")

Found 17118 validated image filenames belonging to 5 classes.
Found 4279 validated image filenames belonging to 5 classes.


In [7]:
def create_model():
    conv_base = Xception(include_top=False, input_tensor=None,weights="../input/inceptionresnetv2/xception_tf_notop.h5",
    pooling=None, input_shape=(TARGET_SIZE, TARGET_SIZE, 3), classifier_activation='softmax')
                               
    model = conv_base.output
    model = layers.GlobalAveragePooling2D()(model)
    model = layers.Dense(5, activation = "softmax")(model)
    model = models.Model(conv_base.input, model)

    model.compile(optimizer = Adam(lr = 0.0001),
                  loss = "sparse_categorical_crossentropy",
                  metrics = ["acc"])
    return model

In [8]:
model = create_model()

In [9]:
model_save = ModelCheckpoint('submission.csv', 
                              save_best_only = True, 
                              save_weights_only = True,
                              monitor = 'val_loss', 
                              mode = 'min', verbose = 1)


In [10]:
early_stop = EarlyStopping(monitor = 'val_loss', min_delta = 0.001, 
                            patience = 5, mode = 'min', verbose = 1,
                            restore_best_weights = True)
reduce_lr = ReduceLROnPlateau(monitor = 'val_loss', factor = 0.3, 
                               patience = 2, min_delta = 0.001, 
                               mode = 'min', verbose = 1) #reduced learning rate
history = model.fit(
     train_generator,
     steps_per_epoch = STEPS_PER_EPOCH,
     epochs = EPOCHS,
     validation_data = validation_generator,
     validation_steps = VALIDATION_STEPS,
     callbacks = [model_save, early_stop, reduce_lr])


Epoch 1/20
Epoch 00001: val_loss improved from inf to 0.47915, saving model to submission.csv
Epoch 2/20
Epoch 00002: val_loss improved from 0.47915 to 0.47438, saving model to submission.csv
Epoch 3/20
Epoch 00003: val_loss did not improve from 0.47438
Epoch 4/20
Epoch 00004: val_loss did not improve from 0.47438

Epoch 00004: ReduceLROnPlateau reducing learning rate to 2.9999999242136255e-05.
Epoch 5/20
Epoch 00005: val_loss did not improve from 0.47438
Epoch 6/20
Epoch 00006: val_loss did not improve from 0.47438

Epoch 00006: ReduceLROnPlateau reducing learning rate to 8.999999772640877e-06.
Epoch 7/20
Epoch 00007: val_loss did not improve from 0.47438
Restoring model weights from the end of the best epoch.
Epoch 00007: early stopping


In [11]:
model.save('./Xception_best_weights.h5')
model = keras.models.load_model('./Xception_best_weights.h5')
submission_file = pd.read_csv(os.path.join('../input/cassava-leaf-disease-classification/sample_submission.csv'))
submission_file

Unnamed: 0,image_id,label
0,2216849948.jpg,4


In [12]:
import numpy as np
predictions = []
for image_id in submission_file.image_id:
    image = Image.open(os.path.join(f'../input/cassava-leaf-disease-classification/test_images/{image_id}'))
    image = image.resize((TARGET_SIZE, TARGET_SIZE))
    image = np.expand_dims(image, axis = 0)
    predictions.append(np.argmax(model.predict(image)))

submission_file['label'] = predictions
submission_file

Unnamed: 0,image_id,label
0,2216849948.jpg,4


In [13]:
submission_file.to_csv('submission.csv', index = False)