In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages tos load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Cassava Plant Disease Classfication with Tensorflow/Keras, OpenCV etc.

Reference: https://www.kaggle.com/homiarafarhana/cassava-2nd#Data-Agumentation-and-Pre-Processing

Feel free to give some comments on how I can improve or mistake made!!

This notebook is suitable for dummies/beginners as I myself a beginner also!

In this Notebook, there are few sections, which you can see on the right tab.

Basically, this is a classification task uses:
* Tensorflow/Keras
    * Deep Learning stuff(model building etc.)
    * Data Augmentation with `ImageDataGenerator`
    * TensorBoard to visualize model's performance.
* OpenCV
    * Displaying pictures from dataset
* Matplotlib 
    * Visualize augmented images
    * Illustrate model's performance

At the last code block under **"Making Predictions/Submission"** , you may find useful code block on how to commit your notebook with pre-trained model.

Do upvote if you find this helpful! Thanks in advanced!

# Packages needed

In [None]:
import glob
import shutil
import cv2
import os
from keras_preprocessing import image
from keras_preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import Callback, ReduceLROnPlateau, ModelCheckpoint, TensorBoard
from tensorflow.keras.layers import Conv2D, MaxPooling2D, BatchNormalization, Dropout, Activation, GlobalAveragePooling2D, Dense
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import Accuracy
from tensorflow.keras.applications import InceptionResNetV2
from tensorflow.keras import Input

%matplotlib inline
plt.rcParams["figure.figsize"] = (17, 6) # (w, h)

### Directories

Original Dataset

In [None]:
TRAINING_DIR = "../input/cassava-leaf-disease-classification/train_images/"
TRAINING_CSV = "../input/cassava-leaf-disease-classification/train.csv"
JSON_LABELS = "../input/cassava-leaf-disease-classification/label_num_to_disease_map.json"
PRETRAINED_MODEL = "../input/resnet50/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5"

In [None]:
os.listdir("../input")
print(os.listdir("../input"))

In [None]:
train_df = pd.read_csv(TRAINING_CSV)
train_df["label"] = train_df["label"].astype("string") # for Keras flow_from_dataframe

In [None]:
train_df.head()

In [None]:
total_images_count = len(train_df.index)
total_train_img_count = int(len(train_df.index) * 0.8)
total_val_img_count = total_images_count - total_train_img_count
print("Expected images coutns:")
print("\nTotal Images from original directory: {}".format(total_images_count))
print("Training Images: {}".format(total_train_img_count))
print("Validation Images: {}".format(total_val_img_count))


In [None]:
label_df = pd.read_json(JSON_LABELS, orient = 'index')
label_df = label_df.values.flatten().tolist()
label_df

In [None]:
train_label_0 = train_df[train_df["label"]== "0"]
train_label_1 = train_df[train_df["label"]== "1"]
train_label_2 = train_df[train_df["label"]== "2"]
train_label_3 = train_df[train_df["label"]== "3"]
train_label_4 = train_df[train_df["label"]== "4"]
len(train_label_4)

In [None]:
len_by_labels = [
    len(train_label_0),
    len(train_label_1),
    len(train_label_2),
    len(train_label_3),
    len(train_label_4),
]

plt.bar(label_df, len_by_labels)

plt.xlabel('Labels')  
plt.ylabel('Image Count')

plt.show()  

# Displaying some images

In [None]:
training_images_dir = TRAINING_DIR + "/*.jpg"
print(training_images_dir)
training_images = glob.glob(training_images_dir)
plt.figure(figsize=(12, 12))    
for i in range(1, 10):
    training_image = np.random.choice(training_images)
    training_image_RGB = cv2.imread(training_image)[...,::-1]
    print(training_image_RGB.shape)
    plt.subplot(3, 3, i)
    plt.imshow(training_image_RGB)
    plt.axis('off')

# Data Pre-processing

- Data augmentation etc.

In [None]:
training_datagen = ImageDataGenerator(
    rescale = 1/255,
    rotation_range = 100,
    width_shift_range = 0.2,
    height_shift_range = 0.2,
    shear_range = 0.2,
    zoom_range = 0.3,
    brightness_range = [0.7, 1.4],
    horizontal_flip = True,
    vertical_flip=True,
    fill_mode = "nearest",
    validation_split=0.2
)

validation_datagen = ImageDataGenerator(
    rescale = 1/255,
    validation_split=0.2
)

In [None]:
BATCH_SIZE = 24
IMG_WIDTH = 300
IMG_HEIGHT = 300
CHANNEL = 3

print("\nTraining Dataset")
train_ds = training_datagen.flow_from_dataframe(
    train_df,
    TRAINING_DIR,
    target_size = (IMG_WIDTH, IMG_HEIGHT),
    class_mode = "categorical",
    batch_size = BATCH_SIZE,
    x_col = "image_id",
    y_col = "label",
    shuffle = True,
    subset = "training"

)
print("\nValidation Dataset")
validation_ds = validation_datagen.flow_from_dataframe(
    train_df,
    TRAINING_DIR,
    target_size = (IMG_WIDTH, IMG_HEIGHT),
    class_mode = "categorical",
    batch_size = BATCH_SIZE,
    x_col = "image_id",
    y_col = "label",
    shuffle = False,
    subset = "validation"
)
print("\nClass Indices:")
print(train_ds.class_indices)

# Display Augmented Data

In [None]:
plt.figure(
    figsize = (12, 12)
)

for i in range (1, 10):
    img, label = train_ds.next()
    plt.subplot(3, 3, i)
    plt.imshow(img[0])
    plt.axis("Off")

# Callbacks

In [None]:
class theCallBacks(Callback):
    def on_epoch_end(self, epoch, logs={}):
        if((logs.get("val_accuracy")>0.92) and (logs.get("accuracy")>0.92)): 
            print("\Training Accuracy> 0.92 & Validation Accuracy> 0.92\nCancelling training!")
            self.model.stop_training = True

            
callback_on_metrics = theCallBacks() #Instantiate theCallBacks

reduce_lr = ReduceLROnPlateau(
                    monitor='val_loss', 
                    factor=0.5,
                    patience= 2, 
                    verbose = 1,
                    cooldown = 1,
                    min_lr=0.0001)

# Defining Loss Function, Optimizer, Desired Metrics

In [None]:
loss_func = CategoricalCrossentropy()
optimizer = Adam(learning_rate=0.001)

### Model Checkpoint

In [None]:
model_checkpoint_path="./cassava_Model.h5"
checkpoint = ModelCheckpoint(model_checkpoint_path, monitor='val_accuracy', verbose=1, save_best_only=True,mode='max')

# TensorBoard
Enable this if you wanna use TensorBoard

In [None]:
# class LearningRateLogger(Callback):
#     def __init__(self):
#         super().__init__()
#         self._supports_tf_logs = True

#     def on_epoch_end(self, epoch, logs=None):
#         if logs is None or "learning_rate" in logs:
#             return
#         logs["learning_rate"] = self.model.optimizer.lr
        
#log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
# tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)     

# source: https://stackoverflow.com/questions/49127214/keras-how-to-output-learning-rate-onto-tensorboard

### Load TensorBoard with below line!

In [None]:
# %load_ext tensorboard
# %tensorboard --logdir logs

# Model building

Transfer Learning model with ResNet50.

In [None]:
new_input = Input(shape=(IMG_WIDTH, IMG_HEIGHT, CHANNEL))

In [None]:
base_model = InceptionResNetV2(
    include_top=False,
    weights="imagenet",
    input_tensor=new_input,
)
base_model.trainable = True

### Unfreeze certain layers/block(s)


Below Code block freeze all except the last block of InceptionResNetV2

In [None]:
# for layer in base_model.layers[:143]:
#     layer.trainable = False
#    

# for i, layer in enumerate(base_model.layers):
#     print(i,layer.name , "-->", layer.trainable)

### Model Building

In [None]:
def create_model():

    model = Sequential()
    model.add(base_model)
    model.add(BatchNormalization())
    model.add(GlobalAveragePooling2D())
    model.add(Dropout(0.5))
    
    model.add(Dense(256,activation = "relu"))

    model.add(Dense(5, activation = "softmax"))

    return model

In [None]:
model = create_model()

model.compile(
    optimizer=optimizer,
    loss = loss_func,
    metrics = ["accuracy"]
)

model.summary()

### Train the model

In [None]:
num_epochs = 10
steps_per_epoch = total_train_img_count // BATCH_SIZE

In [None]:
history = model.fit(
    train_ds,
    epochs = num_epochs,
    validation_data = validation_ds,
    verbose = 1,
    steps_per_epoch = steps_per_epoch,
    callbacks = [
        reduce_lr,
        callback_on_metrics,
        #LearningRateLogger(),
        #tensorboard_callback, # Enable these two if using TensorBoard
        checkpoint
                ]
)

In [None]:
model.save("./cassava_Model.h5")


# Illustrates Model Performance

In [None]:
acc = history.history["accuracy"]
val_acc = history.history["val_accuracy"]
loss = history.history["loss"]
val_loss = history.history["val_loss"]

epochs = range(len(acc))

plt.plot(epochs, acc, "r", label="Training Accuracy")
plt.plot(epochs, val_acc, "b", label="Validation Accuracy")
plt.title("Training and Validation Accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend()
plt.figure()

plt.plot(epochs, loss, "r", label="Training Loss")
plt.plot(epochs, val_loss, "b", label="Validation Loss")
plt.title("Training and Validation Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.figure()

plt.show()

# Making Predictions/Submission

If you have trained model that you trained locally or somewhere else, you can use below code block for submission purposes. However, you have to upload your model first and change the Path accordingly at this line of code:
> model = tf.keras.models.load_model("./cassava_Model.h5")

In [None]:
import tensorflow as tf

model = tf.keras.models.load_model("./cassava_Model.h5")
predicted = []
sample_submission = pd.read_csv('../input/cassava-leaf-disease-classification/sample_submission.csv')

for image in sample_submission.image_id:
    img = tf.keras.preprocessing.image.load_img('../input/cassava-leaf-disease-classification/test_images/' + image)
    img = tf.keras.preprocessing.image.img_to_array(img)
    img = tf.keras.preprocessing.image.smart_resize(img, (IMG_WIDTH, IMG_HEIGHT))
    img = tf.reshape(img, (-1, IMG_WIDTH, IMG_HEIGHT, CHANNEL))
    prediction = model.predict(img/255)  
    predicted.append(np.argmax(prediction))

submission = pd.DataFrame({'image_id': sample_submission.image_id, 'label': predicted})
submission.to_csv('submission.csv', index=False) 