In [None]:

import os
import glob
import shutil
import json
import keras
import itertools
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
from PIL import Image
import matplotlib.pyplot as plt
from collections import Counter
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import GlobalAveragePooling2D, Flatten, Dense, Dropout
from keras.optimizers import RMSprop, Adam
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras.layers import Dense, Flatten, Dropout, Conv2D, Activation, MaxPooling2D, BatchNormalization


In [None]:
work_dir = '../input/cassava-leaf-disease-classification/'
os.listdir(work_dir) 
train_path = '/kaggle/input/cassava-leaf-disease-classification/train_images'

In [None]:
data = pd.read_csv(work_dir + 'train.csv')
data.head()

In [None]:
print(Counter(data['label'])) # checking labels

In [None]:
data.dtypes

In [None]:
print(os.listdir(work_dir))
data_dir = work_dir


In [None]:
sns.set_style("dark")
plt.figure(figsize=(10,8))
sns.countplot(data["label"], edgecolor="black", palette="mako")


In [None]:
df_train = pd.read_csv("../input/cassava-leaf-disease-classification/train.csv")
df_train.head()
df_train["label"] = df_train["label"].astype(str) #convert to str as we want to use Categorical Cross Entropy (CCE) later on



# **0: Cassava Bacterial Blight**

In [None]:
path = "../input/cassava-leaf-disease-classification/train_images/"
df0 = df_train[df_train["label"] == "0"]
files = df0["image_id"].sample(3).tolist()

plt.figure(figsize=(15,5))
index = 0
for file in files:
    image = Image.open(path + file)
    plt.subplot(1, 3, index + 1)
    plt.imshow(image)
    plt.axis("off")
    index += 1

plt.show()


#  **1: Cassava Brown Streak Disease**

In [None]:
df1 = df_train[df_train["label"] == "1"]
files = df1["image_id"].sample(3).tolist()

plt.figure(figsize=(15,5))
index = 0
for file in files:
    image = Image.open(path + file)
    plt.subplot(1, 3, index + 1)
    plt.imshow(image)
    plt.axis("off")
    index += 1

plt.show()


# **2: Cassava Green Mottle**

In [None]:
df2 = df_train[df_train["label"] == "2"]
files = df2["image_id"].sample(3).tolist()

plt.figure(figsize=(15,5))
index = 0
for file in files:
    image = Image.open(path + file)
    plt.subplot(1, 3, index + 1)
    plt.imshow(image)
    plt.axis("off")
    index += 1

plt.show()


# **3: Cassava Mosiac Disease**

In [None]:
df3 = df_train[df_train["label"] == "3"]
files = df3["image_id"].sample(3).tolist()

plt.figure(figsize=(15,5))
index = 0
for file in files:
    image = Image.open(path + file)
    plt.subplot(1, 3, index + 1)
    plt.imshow(image)
    plt.axis("off")
    index += 1

plt.show()


# **4: Healthy**

In [None]:
df3 = df_train[df_train["label"] == "3"]
files = df3["image_id"].sample(3).tolist()

plt.figure(figsize=(15,5))
index = 0
for file in files:
    image = Image.open(path + file)
    plt.subplot(1, 3, index + 1)
    plt.imshow(image)
    plt.axis("off")
    index += 1

plt.show()


In [None]:
data = pd.read_csv(work_dir + 'train.csv')
data.head()

In [None]:
# Importing the json file with labels

f = open(work_dir + 'label_num_to_disease_map.json')
real_labels = json.load(f)
real_labels = {int(k):v for k,v in real_labels.items()}

data['class_name'] = data.label.map(real_labels)

from sklearn.model_selection import train_test_split

train,val = train_test_split(data, test_size = 0.2, random_state = 2, stratify = data['class_name'])

IMG_SIZE = 224
size = (IMG_SIZE,IMG_SIZE)
n_CLASS = 5

datagen = ImageDataGenerator(preprocessing_function = tf.keras.applications.efficientnet.preprocess_input,
                    rotation_range = 40,
                    width_shift_range = 0.2,
                    height_shift_range = 0.2,
                    shear_range = 0.2,
                    zoom_range = 0.2,
                    horizontal_flip = True,
                    vertical_flip = True,
                    fill_mode = 'nearest')

train_set = datagen.flow_from_dataframe(train,directory = train_path,
                         seed=42,
                         x_col = 'image_id',
                         y_col = 'class_name',
                         target_size = size,
                         #color_mode="rgb",
                         class_mode = 'categorical',
                         interpolation = 'nearest',
                         shuffle = True,
                         batch_size = 20)

val_set = datagen.flow_from_dataframe(val,directory = train_path,
                         seed=42,
                         x_col = 'image_id',
                         y_col = 'class_name',
                         target_size = size,
                         #color_mode="rgb",
                         class_mode = 'categorical',
                         interpolation = 'nearest',
                         shuffle = True,
                         batch_size = 20)




In [None]:
from keras.layers import ZeroPadding2D, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Input
from keras.layers import GlobalAveragePooling2D, MaxPooling2D
from keras.models import Model, Sequential
from keras.callbacks import ModelCheckpoint
from keras import regularizers


In [None]:
def main_block(x, filters, n, strides, dropout):
    # Normal part
    x_res = Conv2D(filters, (3,3), strides=strides, padding="same")(x)# , kernel_regularizer=l2(5e-4)
    x_res = BatchNormalization()(x_res)
    x_res = Activation('relu')(x_res)
    x_res = Conv2D(filters, (3,3), padding="same")(x_res)
    # Alternative branch
    x = Conv2D(filters, (1,1), strides=strides)(x)
    # Merge Branches
    x = Add()([x_res, x])

    for i in range(n-1):
        # Residual conection
        x_res = BatchNormalization()(x)
        x_res = Activation('relu')(x_res)
        x_res = Conv2D(filters, (3,3), padding="same")(x_res)
        # Apply dropout if given
        if dropout: x_res = Dropout(dropout)(x)
        # Second part
        x_res = BatchNormalization()(x_res)
        x_res = Activation('relu')(x_res)
        x_res = Conv2D(filters, (3,3), padding="same")(x_res)
        # Merge branches
        x = Add()([x, x_res])

    # Inter block part
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    return x

def build_model(input_dims, output_dim, n, k, act= "relu", dropout=None):
    """ Builds the model. Params:
        - n: number of layers. WRNs are of the form WRN-N-K
             It must satisfy that (N-4)%6 = 0
        - k: Widening factor. WRNs are of the form WRN-N-K
             It must satisfy that K%2 = 0
        - input_dims: input dimensions for the model
        - output_dim: output dimensions for the model
        - dropout: dropout rate - default=0 (not recomended >0.3)
        - act: activation function - default=relu. Build your custom
     one with keras.backend (ex: swish, e-swish)
    """
    # Ensure n & k are correct
    assert (n-4)%6 == 0
    assert k%2 == 0
    n = (n-4)//6 
    # This returns a tensor input to the model
    inputs = Input(shape=(input_dims))

    # Head of the model
    x = Conv2D(16, (3,3), padding="same")(inputs)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    # 3 Blocks (normal-residual)
    x = main_block(x, 16*k, n, (1,1), dropout) # 0
    x = main_block(x, 32*k, n, (2,2), dropout) # 1
    x = main_block(x, 64*k, n, (2,2), dropout) # 2

    # Final part of the model
    x = AveragePooling2D((8,8))(x)
    x = Flatten()(x)
    outputs = Dense(output_dim, activation="softmax")(x)

    model = Model(inputs=inputs, outputs=outputs)
    return model


In [None]:
model = build_model((224,224,3), 11,16,4)
model.compile("adam","categorical_crossentropy", ['accuracy'])

In [None]:
model.summary()

In [None]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


In [None]:
model = model
filepath = "best_model_1.hdf5"

checkpoint = ModelCheckpoint(filepath, verbose=1, save_best_only=True)
callbacks_list = [checkpoint]
    
    
history = model.fit_generator(
           train_set,
           steps_per_epoch=train_set.n//train_set.batch_size,
           epochs=1,
           validation_data=val_set,
           validation_steps=val_set.n//val_set.batch_size,  callbacks=callbacks_list)




In [None]:
from keras.models import load_model
import h5py


In [None]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


In [None]:
keras.utils.plot_model(model)


In [None]:
model = model
filepath = "EffNetB0_512_8.h5"

checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
callbacks_list = [checkpoint]
    
    
history = model.fit_generator(
           train_set,
           steps_per_epoch=train_set.n//train_set.batch_size,
           epochs=20,
           validation_data=val_set,
           validation_steps=val_set.n//val_set.batch_size,  callbacks=callbacks_list)




In [None]:
# Plot results
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(1, len(acc)+1)

plt.plot(epochs, acc, 'g', label='Training accuracy')
plt.plot(epochs, val_acc, 'r', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend()

plt.figure()

plt.plot(epochs, loss, 'g', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()


In [None]:
TEST_DIR = '../input/cassava-leaf-disease-classification/test_images/'


In [None]:
test_images = os.listdir(TEST_DIR)
predictions = []

for image in test_images:
    img = Image.open(TEST_DIR + image)
    img = img.resize(size)
    img = np.expand_dims(img, axis=0)
    predictions.extend(model.predict(img).argmax(axis = 1))


In [None]:
predictions


In [None]:

# Creating the CSV for final submission

sub = pd.DataFrame({'image_id': test_images, 'label': predictions})
display(sub)
sub.to_csv('submission.csv', index = False)
