# Introduction

Creation of a base model (CNN) for Cassava Leaf Disease Classification competition.

![](https://ak.picdn.net/shutterstock/videos/6440165/thumb/1.jpg)



# Set up environment

In [None]:
# Imports
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow.keras as keras
import tensorflow.keras.layers as layers 
import tensorflow.keras.layers.experimental.preprocessing as preprocessing
import seaborn as sns
import os, cv2, json, warnings
warnings.simplefilter("ignore")
import matplotlib.pyplot as plt
from matplotlib import gridspec
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.python.keras import optimizers
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dropout, Flatten, Dense, Activation
from tensorflow.python.keras.layers import  Convolution2D, MaxPooling2D
from tensorflow.python.keras import backend as K
def display_training_curves(training, validation, title, subplot):
    if subplot%10==1: # set up the subplots on the first call
        plt.subplots(figsize=(10,10), facecolor='#F0F0F0')
        plt.tight_layout()
    ax = plt.subplot(subplot)
    ax.set_facecolor('#F8F8F8')
    ax.plot(training)
    ax.plot(validation)
    ax.set_title('model '+ title)
    ax.set_ylabel(title)
    #ax.set_ylim(0.28,1.05)
    ax.set_xlabel('epoch')
    ax.legend(['train', 'valid.'])
K.clear_session()
WORK_DIR = '../input/cassava-leaf-disease-classification'
os.listdir(WORK_DIR)

In [None]:
print('Train images: %d' %len(os.listdir(
    os.path.join(WORK_DIR, "train_images"))))

In [None]:
train_labels = pd.read_csv(os.path.join(WORK_DIR, "train.csv"))
train_labels

In [None]:
with open(os.path.join(WORK_DIR, "label_num_to_disease_map.json")) as file:
    print(json.dumps(json.loads(file.read()), indent=4))

![](https://neurohive.io/wp-content/uploads/2019/08/Screenshot-from-2019-08-26-23-19-25.png)

# Set up variables

In [None]:
"""
Parameters
"""
BATCH_SIZE = 32
EPOCHS = 100
TARGET_SIZE = 512
STEPS = 133
VALID_STEPS = 30
LR = 0.0004

# Reproducability
def set_seed(seed=27):
    np.random.seed(seed)
    tf.random.set_seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    os.environ['TF_DETERMINISTIC_OPS'] = '1'
set_seed()

# Set Matplotlib defaults
plt.rc('figure', autolayout=True)
plt.rc('axes', labelweight='bold', labelsize='large',
       titleweight='bold', titlesize=18, titlepad=10)
plt.rc('image', cmap='magma')
warnings.filterwarnings("ignore") # to clean up output cells


train_labels.label = train_labels.label.astype('str')

train_datagen = ImageDataGenerator(validation_split = 0.2,
                                   rescale=1. / 255,
                                   rotation_range = 45,
                                   zoom_range = 0.2,
                                   horizontal_flip = True,
                                   vertical_flip = True,
                                   fill_mode = 'nearest',
                                   shear_range = 0.2,
                                   height_shift_range = 0.1,
                                   width_shift_range = 0.1)

test_datagen = ImageDataGenerator(validation_split = 0.2,
                                  rescale=1. / 255)

train_generator = train_datagen.flow_from_dataframe(train_labels,
                         directory = os.path.join(WORK_DIR, "train_images"),
                         subset = "training",
                         x_col = "image_id",
                         y_col = "label",
                         target_size = (TARGET_SIZE, TARGET_SIZE),
                         batch_size = BATCH_SIZE,
                         class_mode = "sparse")

test_generator = test_datagen.flow_from_dataframe(train_labels,
                         directory = os.path.join(WORK_DIR, "train_images"),
                         subset = "validation",
                         x_col = "image_id",
                         y_col = "label",
                         target_size = (TARGET_SIZE, TARGET_SIZE),
                         batch_size = BATCH_SIZE,
                         class_mode = "sparse")


In [None]:
sns.set_style("whitegrid")
fig, ax = plt.subplots(figsize = (10, 4))

for i in ['top', 'right', 'left']:
    ax.spines[i].set_visible(False)
ax.spines['bottom'].set_color('black')

sns.countplot(train_labels.label, edgecolor = 'black',
              palette = reversed(sns.color_palette("Spectral", 5)))
plt.xlabel('Classes', fontfamily = 'serif', size = 15)
plt.ylabel('Count', fontfamily = 'serif', size = 15)
plt.xticks(fontfamily = 'serif', size = 12)
plt.yticks(fontfamily = 'serif', size = 12)
ax.grid(axis = 'y', linestyle = '--', alpha = 0.9)
plt.show()

# Building the model

In [None]:
cnn = Sequential()
cnn.add(Convolution2D(filters=32, kernel_size=5, padding ="same", input_shape=(TARGET_SIZE, TARGET_SIZE, 3), activation='relu'))
cnn.add(MaxPooling2D(pool_size=(3,3)))

cnn.add(Convolution2D(filters=64, kernel_size=3, padding ="same",activation='relu'))
cnn.add(MaxPooling2D(pool_size=(3,3)))

cnn.add(Convolution2D(filters=128, kernel_size=3, padding ="same",activation='relu'))
cnn.add(MaxPooling2D(pool_size=(3,3)))

cnn.add(Flatten())
cnn.add(Dense(2048, activation='relu'))
cnn.add(Dropout(0.5))
cnn.add(Dense(5, activation='softmax'))

## summary

The 'sparse_categorical' versions of the loss and metrics are appropriate for a classification task with more than two labels.

In [None]:
cnn.compile(
  optimizer=tf.keras.optimizers.Adam(learning_rate=LR),
  loss='sparse_categorical_crossentropy',
  metrics=['sparse_categorical_accuracy'])

cnn.summary()

## Fit Model

In [None]:
history = cnn.fit_generator(
    train_generator,
    steps_per_epoch=STEPS,
    epochs=EPOCHS,  
    validation_data=test_generator,  
    validation_steps=VALID_STEPS)
    #callbacks = [early_stop])  

### Save Model

In [None]:
cnn.save('Model_3.h5')    
cnn.save_weights('W_Model_3.h5')      

In [None]:
display_training_curves( 
    history.history['loss'],
    history.history['val_loss'], 
    'loss',
    211,
)
display_training_curves(
    history.history['sparse_categorical_accuracy'],
    history.history['val_sparse_categorical_accuracy'],
    'accuracy',
    212, 
)

**Work in progress ...**