In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

%matplotlib inline

In [None]:
train_dir = '../input/cassava-leaf-disease-classification/train_images/'
test_dir = '../input/cassava-leaf-disease-classification/test_images/'


In [None]:
train_df = pd.read_csv('../input/cassava-leaf-disease-classification/train.csv')
label_js = pd.read_json('../input/cassava-leaf-disease-classification/label_num_to_disease_map.json', 
                         orient='index')


In [None]:
train_df['image'] = train_dir+train_df.image_id

Disease sample photos

In [None]:
from keras.preprocessing import image
fig, axes = plt.subplots(nrows=5, ncols=5, figsize=(20,20))
for row in np.arange(5):
    for col in np.arange(5):        
        cur_img_name = train_df[train_df.label==row].image.sample().values[0]
        cur_img = image.load_img(cur_img_name)
        cur_img = image.img_to_array(cur_img)
        axes[row, col].imshow(cur_img/255., aspect='auto')  
        axes[row, col].tick_params(axis='both', which='both', 
                                   bottom=False, top=False, 
                                   labelbottom=False, right=False, 
                                   left=False, labelleft=False)
for ax, label in zip(axes[:,0], label_js[0]):
    ax.set_ylabel(label, rotation=90, size='large')

plt.subplots_adjust(wspace=.05, hspace=.05)
plt.show()


Splitting dataset and encoding categorical target

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(train_df['image'],train_df['label'], test_size=0.2)

from sklearn.preprocessing import LabelBinarizer
lb = LabelBinarizer()
lb.fit(train_df.label)

y_train_lb = lb.transform(y_train)
y_val_lb = lb.transform(y_val)

X_train_df = pd.DataFrame(X_train).reset_index().drop(labels='index', axis=1)
y_train_df = pd.DataFrame(y_train_lb).add_prefix('label_')

X_val_df = pd.DataFrame(X_val).reset_index().drop(labels='index', axis=1)
y_val_df = pd.DataFrame(y_val_lb).add_prefix('label_')

train = pd.concat([X_train_df, y_train_df], axis=1)
validation = pd.concat([X_val_df, y_val_df], axis=1)



Function for creating training and validation image generators

In [None]:
from keras.preprocessing.image import ImageDataGenerator

def create_image_generators(preprocess_input, target_image_size):
    train_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)
    val_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

    train_generator = train_datagen.flow_from_dataframe(
        train,
        x_col='image',
        y_col=[f'label_{x}' for x in np.arange(5)],
        target_size=target_image_size,
        batch_size=32,
        shuffle=True,
        class_mode='raw')

    validation_generator = val_datagen.flow_from_dataframe(
        validation,
        x_col='image',
        y_col=[f'label_{x}' for x in np.arange(5)],
        target_size=target_image_size,
        shuffle=False,
        batch_size=32,
        class_mode='raw')
    return train_generator, validation_generator

# ResNet50 model

Image generators for training and validation

In [None]:
from keras.applications.resnet50 import preprocess_input as preprocess_input_rn
train_generator_rn50, validation_generator_rn50 = create_image_generators(preprocess_input_rn, (224,224))

Loading base model and previously saved weights

In [None]:
from keras.applications.resnet50 import ResNet50 
base_model_rn50 = ResNet50(input_shape=(224,224, 3),
                        include_top=False, 
                        weights='imagenet')


Adding layers for classification

In [None]:
from keras.layers import Flatten, Dense, GlobalAveragePooling2D, BatchNormalization, Activation, Dropout
from keras.models import Model, Sequential

dropout_dense_layer = 0.3

model_rn50 = Sequential()
model_rn50.add(base_model_rn50)
    
model_rn50.add(GlobalAveragePooling2D())
model_rn50.add(Dense(128))
model_rn50.add(BatchNormalization())
model_rn50.add(Activation('relu'))
model_rn50.add(Dense(32))
model_rn50.add(BatchNormalization())
model_rn50.add(Activation('relu'))
model_rn50.add(Dropout(dropout_dense_layer))

model_rn50.add(Dense(5, activation='softmax'))
    
model_rn50.summary()

In [None]:
model_rn50.compile(optimizer='adam', 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])

In [None]:
EPOCHS = 5
BATCH_SIZE = 32


In [None]:
history = model_rn50.fit_generator(generator=train_generator_rn50,
                    validation_data=validation_generator_rn50,                    
                    steps_per_epoch=len(train)//BATCH_SIZE,
                    epochs=EPOCHS)

Predict labels

In [None]:
import os
image_name_list = os.listdir(test_dir)

label_list_rn50 = []
list_rn50 = []



for image_name in image_name_list:
    img = image.load_img(test_dir+image_name, target_size=(224, 224))
    model_input_img = preprocess_input_rn(np.expand_dims(img.copy(), axis=0)) 
    predicted_list_rn50 = model_rn50.predict(model_input_img)   
    list_rn50.append(predicted_list_rn50)
    predicted_label = np.argmax(predicted_list_rn50)
    label_list_rn50.append(predicted_label)

In [None]:
label_list_rn50