# Intro
Welcome to the Cassava Leaf Disease Classification competition.

There are 5 classifications (click for further informations):
* 0: [Cassava Bacterial Blight (CBB)](https://en.wikipedia.org/wiki/Bacterial_blight_of_cassava)
* 1: [Cassava Brown Streak Disease (CBSD)](https://en.wikipedia.org/wiki/Cassava_brown_streak_virus_disease)
* 2: [Cassava Green Mottle (CGM)](https://en.wikipedia.org/wiki/Cassava_green_mottle_virus)
* 3: [Cassava Mosaic Disease (CMD)](https://en.wikipedia.org/wiki/Cassava_mosaic_virus)
* 4: Healthy"

We will give a simple starter notebook based on a CNN.

# Libraries

In [None]:
import numpy as np
import pandas as pd
import os
import cv2
import matplotlib.pyplot as plt

In [None]:
from keras.utils import to_categorical, Sequence
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D, BatchNormalization
from keras.optimizers import RMSprop,Adam

# Path

In [None]:
path = '/kaggle/input/cassava-leaf-disease-classification/'
os.listdir(path)

# Load Data

In [None]:
train_data = pd.read_csv(path+'train.csv')
samp_subm = pd.read_csv(path+'sample_submission.csv')

# EDA

In [None]:
print('number of train data:', len(train_data))
print('number of train images:', len(os.listdir(path+'train_images/')))
print('number of test images:', len(os.listdir(path+'test_images/')))

Distribution of the labels:

In [None]:
train_data['label'].hist(bins=4)

Plot an image:

In [None]:
img = cv2.imread(path+'train_images/'+'1000015157.jpg')
plt.imshow(img)
plt.show()

# Prepare Data For Model

In [None]:
batch_size = 64
img_size = 64
img_channel = 3

## Train Labels And Class Weights

In [None]:
y_train = to_categorical(train_data['label'])

In [None]:
#class_weight = dict(zip(range(0, 7), (train_data['label'].value_counts()/len(train_data))))

## Image Data Generator

In [None]:
class DataGenerator(Sequence):
    def __init__(self, path, list_IDs, labels, batch_size, img_size, img_channel):
        self.path = path
        self.list_IDs = list_IDs
        self.labels = labels
        self.batch_size = batch_size
        self.img_size = img_size
        self.img_channel = img_channel
        self.indexes = np.arange(len(self.list_IDs))
        
    def __len__(self):
        return int(np.floor(len(self.list_IDs)/self.batch_size))
    
    
    def __getitem__(self, index):
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        list_IDs_temp = [self.list_IDs[k] for k in indexes]
        X, y = self.__data_generation(list_IDs_temp)
        return X, y

    
    def __data_generation(self, list_IDs_temp):
        X = np.empty((self.batch_size, self.img_size, self.img_size, self.img_channel))
        y = np.empty((self.batch_size, 5), dtype=int)
        for i, ID in enumerate(list_IDs_temp):
            data_file = cv2.imread(self.path+ID)
            img = cv2.resize(data_file, (self.img_size, self.img_size))
            X[i, ] = img
            y[i, ] = self.labels[i]
        X = X.astype('float32')
        X -= X.mean()
        X /= X.std()
        return X, y

# Define Model

In [None]:
model = Sequential()
model.add(Conv2D(128, input_shape=(img_size,img_size,img_channel), kernel_size=5, strides=4, activation='relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size=(4)))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(5, activation='softmax'))

In [None]:
model.compile(optimizer=Adam(lr=1e-3), loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
model.summary()

# Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
clf_log = LogisticRegression(n_jobs=-1, verbose=1)

# Train Model

In [None]:
train_generator = DataGenerator(path+'train_images/', train_data['image_id'], y_train, batch_size, img_size, img_channel)

## Hyperparameter tuning

In [None]:
#@title K-fold, 10 splits, Shuffle=True and random_state = 42. The distribution of Training and Val data in each fold is now:
from sklearn.model_selection import StratifiedKFold
folds = list(StratifiedKFold(n_splits=40, shuffle=True, random_state=42).split(train_data['image_id'],train_data['label']))
print("Training split: {}".format(len(folds[0][0])))
print("Validation split: {}".format(len(folds[0][1])))

In [None]:
X = np.empty((len(y_train[folds[0][1]]), 64, 64, 3))
y = np.empty((len(y_train[folds[0][1]]), 5), dtype=int)

In [None]:
img = 0
for i, ID in enumerate(train_data['image_id'][folds[0][1]]):
    data_file = cv2.imread(path+"train_images/"+ID)
    img = cv2.resize(data_file, (64,64), interpolation = cv2.INTER_AREA)
    X[i, ] = img
    y[i, ] = y_train[folds[0][1]][i]
X = X.astype('float32')
X -= X.mean()
X /= X.std()

In [None]:
from kerastuner.applications import HyperResNet
from kerastuner.tuners import Hyperband
from kerastuner.tuners import BayesianOptimization

hypermodel = HyperResNet(input_shape=(64, 64, 3), classes = 5)

tuner = BayesianOptimization(
    hypermodel,
    objective='val_accuracy',
    directory='/kaggle/working/my_dir',
    executions_per_trial=3,
    max_trials = 50,
    project_name='tuning')


In [None]:
tuner.search(X,y,validation_split=0.2, verbose=1, epochs=10)

In [None]:
   # Show a summary of the search
tuner.results_summary()

# Retrieve the best model.
best_model = tuner.get_best_models(num_models=1)[0]

# Evaluate the best model.
#results = best_model.evaluate(X_combined_test_gridsearch, y_test_gridsearch)

In [None]:
history = best_model.fit_generator(generator=train_generator,
                              #class_weight = class_weight,
                              workers=4, verbose = 0, epochs=10
                             )

# Predict Test Data

In [None]:
test_generator = DataGenerator(path+'test_images/', samp_subm['image_id'], samp_subm['label'], 1, img_size, img_channel)

In [None]:
predict = best_model.predict_generator(test_generator, verbose=1)

In [None]:
samp_subm['label'] = predict.argmax(axis=1)

# Export Data

In [None]:
samp_subm.to_csv('submission.csv', index=False)