# Cassava disease classification (keras)

# Import libs

In [None]:
import numpy as np
import pandas as pd
from PIL import Image
import os
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
from sklearn.utils import shuffle
from sklearn.utils import class_weight
from sklearn.preprocessing import minmax_scale
import random
import cv2
from imgaug import augmenters as iaa
import warnings
warnings.filterwarnings('ignore')

import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, Dropout, Activation, Input, BatchNormalization, GlobalAveragePooling2D
from tensorflow.keras import layers
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.experimental import CosineDecay
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.applications import EfficientNetB3
from tensorflow.keras.layers.experimental.preprocessing import RandomCrop,CenterCrop, RandomRotation

# csv file

In [None]:
img_folder = '../input/cassava-leaf-disease-classification/train_images/'

In [None]:
samples_data = pd.read_csv("../input/cassava-leaf-disease-classification/train.csv")
samples_data = shuffle(samples_data, random_state=42)
samples_data["filepath"] = img_folder+samples_data["image_id"]
samples_data.head()


In [None]:
batch_size = 8
img_size = 512
input_shape = (img_size,img_size, 3)
dropout = 0.4
training_percen = 0.8
training_length = int(len(samples_data)*training_percen)
validation_item_count = len(samples_data)-int(len(samples_data)*training_percen)
training_df = samples_data[:training_length]
validation_df = samples_data[training_length:]


# import images 

In [None]:
training_data = tf.data.Dataset.from_tensor_slices((training_df.filepath.values, training_df.label.values))
validation_data = tf.data.Dataset.from_tensor_slices((validation_df.filepath.values, validation_df.label.values))

# Load images from path

In [None]:
def load_image_and_label_from_path(image_path, label):
    img = tf.io.read_file(image_path)
    img = tf.image.decode_jpeg(img, channels=3)
    return img, label

AUTOTUNE = tf.data.experimental.AUTOTUNE

training_data = training_data.map(load_image_and_label_from_path, num_parallel_calls=AUTOTUNE)
validation_data = validation_data.map(load_image_and_label_from_path, num_parallel_calls=AUTOTUNE)


training_data_batches = training_data.shuffle(buffer_size=1000).batch(batch_size).prefetch(buffer_size=AUTOTUNE)
validation_data_batches = validation_data.shuffle(buffer_size=1000).batch(batch_size).prefetch(buffer_size=AUTOTUNE)

# Rescaling images

In [None]:
adapt_data = tf.data.Dataset.from_tensor_slices(training_df.filepath.values)
def adapt_mode(image_path):
    img = tf.io.read_file(image_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = layers.experimental.preprocessing.Rescaling(1.0 / 255)(img)
    return img

adapt_data = adapt_data.map(adapt_mode, num_parallel_calls=AUTOTUNE)
adapt_data_batches = adapt_data.shuffle(buffer_size=1000).batch(batch_size).prefetch(buffer_size=AUTOTUNE)

# Data Augmentation 



In [None]:
augmentation = tf.keras.Sequential(
    [
        layers.experimental.preprocessing.RandomCrop(height=img_size, width=img_size),
        layers.experimental.preprocessing.RandomFlip("horizontal_and_vertical"),
        layers.experimental.preprocessing.RandomRotation(0.25),
        layers.experimental.preprocessing.RandomZoom((-0.2, 0)),
        layers.experimental.preprocessing.RandomContrast((0.2,0.2))
    ]
)

# Build model-transfer learning(EfficientNetB3)

In [None]:
efficientnet = EfficientNetB3(weights="../input/keras-efficientnetb3-no-top-weights/efficientnetb3_notop.h5", 
                              include_top=False, 
                              input_shape=input_shape, 
                              drop_connect_rate=dropout)

inputs = Input(shape=input_shape)
augmented = augmentation(inputs)
efficientnet = efficientnet(augmented)
pooling = layers.GlobalAveragePooling2D()(efficientnet)
dropout = layers.Dropout(dropout)(pooling)
outputs = Dense(5, activation="softmax")(dropout)
model = Model(inputs=inputs, outputs=outputs)
    
model.summary()

In [None]:
epochs = 3

# callbacks

In [None]:

decay_steps = int(round(len(training_df)/batch_size))*epochs
cosine_decay = CosineDecay(initial_learning_rate=1e-4, decay_steps=decay_steps, alpha=0.3)

callbacks = [ModelCheckpoint(filepath='best_model.h5', monitor='val_loss', save_best_only=True)]

model.compile(loss="sparse_categorical_crossentropy", optimizer=tf.keras.optimizers.Adam(cosine_decay), metrics=["accuracy"])

# Training Model

In [None]:

history = model.fit(training_data_batches,
                  epochs = epochs, 
                  validation_data=validation_data_batches,
                  callbacks=callbacks)

In [None]:
history.history

# Plotting

## Accuracy graph

In [None]:
history_dict = history.history

acc = history_dict['accuracy']
val_acc = history_dict['val_accuracy']
epochs = range(1, len(acc) + 1)
line1 = plt.plot(epochs, acc, label='train_Accuracy', color='red')
line2 = plt.plot(epochs, val_acc, label='Val_acuuracy',color='green')

plt.title('Accuracy ~ Epochs graph', fontsize=20)
plt.xlabel('Epochs') 
plt.ylabel('Accuracy')
plt.legend()
plt.show()

## Loss graph

In [None]:
history_dict = history.history

loss = history_dict['loss']
val_loss = history_dict['val_loss']
epochs = range(1, len(acc) + 1)
line1 = plt.plot(epochs, loss, label='train_Accuracy', color='red')
line2 = plt.plot(epochs, val_loss, label='Val_acuuracy',color='green')

plt.title('Loss ~ Epochs graph', fontsize=20)
plt.xlabel('Epochs') 
plt.ylabel('Loss')
plt.legend()
plt.show()

# load model

In [None]:
model.load_weights("best_model.h5")

## scan and augmentation

In [None]:
def scan_img(img_path, crop_size=512):
   
    
    img = Image.open(img_path)
    img_height, img_width = img.size
    img = np.array(img)
    
    y = random.randint(0,img_height-crop_size)
    x = random.randint(0,img_width-crop_size)

    x_img_origins = [0,img_width-crop_size]
    y_img_origins = [0,img_height-crop_size]
    img_list = []
    for x in x_img_origins:
        for y in y_img_origins:
            img_list.append(img[x:x+crop_size , y:y+crop_size,:])
  
    return np.array(img_list)



test_augmentation = tf.keras.Sequential(
    [
        layers.experimental.preprocessing.RandomFlip("horizontal_and_vertical"),
        layers.experimental.preprocessing.RandomZoom((-0.2, 0)),
        layers.experimental.preprocessing.RandomContrast((0.2,0.2))
    ]
)

## predict

In [None]:
def predict(image_filename, folder, TTA_runs=4):

    
    localised_predictions = []
    local_image_list = scan_img(folder+image_filename)
    for local_image in local_image_list:
        duplicated_local_image = tf.convert_to_tensor(np.array([local_image for i in range(TTA_runs)]))
        augmented_images = test_augmentation(duplicated_local_image)
        
        predictions = model.predict(augmented_images)
        localised_predictions.append(np.sum(predictions, axis=0))
    
    global_predictions = np.sum(np.array(localised_predictions),axis=0)
    final_prediction = np.argmax(global_predictions)
    
    return final_prediction



def predictions(image_list, folder):
    predictions = []
    with tqdm(total=len(image_list)) as pbar:
        for image_filename in image_list:
            pbar.update(1)
            predictions.append(predict(image_filename, folder))
    return predictions


In [None]:
from PIL import Image
test_dir = '../input/cassava-leaf-disease-classification/test_images/'
test_imgs = os.listdir(test_dir)

predictions = predictions(test_imgs, test_dir)

# Submission file making

In [None]:
submission = pd.DataFrame({'image_id': test_imgs, 'label': predictions})

In [None]:
submission

In [None]:
submission.to_csv('submission.csv', index = False)