In [None]:
import numpy as np
import pandas as pd 
import tensorflow as tf
from sklearn.utils import class_weight
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
sns.set()
import pandas as pd
import os
import json

import math
from tensorflow import keras
from sklearn.model_selection import train_test_split 
from sklearn.metrics import accuracy_score, confusion_matrix

from tensorflow.keras.applications import EfficientNetB3, Xception, ResNet50V2
from tensorflow.keras import layers
from tensorflow.keras import models
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.utils import plot_model

from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import img_to_array

In [None]:
image = tf.keras.preprocessing.image.load_img(r'../input/cassava-leaf-disease-classification/train_images/1000015157.jpg')
image

In [None]:
# Image Shape
plt.imread('../input/cassava-leaf-disease-classification/train_images/1000015157.jpg').shape

In [None]:
# Path
path = '../input/cassava-leaf-disease-classification'

In [None]:
# Looking total fo images
train_images = os.listdir(os.path.join(path, "train_images"))
print("Total images for Train: ", len(train_images))

In [None]:
# Checking image's classes
with open ('../input/cassava-leaf-disease-classification/label_num_to_disease_map.json') as file:
    classes = json.loads(file.read())
    
print(json.dumps(classes,indent=4))

In [None]:
# Load the data
df_train = pd.read_csv(os.path.join(path, "train.csv"))
df_train.head()

In [None]:
# Including the class names in the data
df_train['class'] = df_train['label'].map({int(i) : c for i, c in classes.items()}) 
df_train.head()

In [None]:
# Plot of quantities by classes
plt.subplots(figsize=(12,8))
ax  = sns.countplot(x='class', data=df_train)

for p in ax.patches:
        ax.annotate('{:1}'.format(p.get_height()),
                    (p.get_x()+0.3, p.get_height()))
plt.xticks(rotation=90)
ax.set_title("quantities by classes", fontdict={'fontsize':15})
plt.show();

## We can see that there is an imbalance between classes. More than half of the data are from the Cassava Mosaic Disease Class (CMD).

<h2 style='background:#4974a5; border:50; color:white'><center>Create Function for plot Images and Predictions<center><h2>


In [None]:
def plot(images, labels, predictions = None):
    n_cols = min(4, len(images))
    n_rows = math.ceil(len(images) / n_cols)
    fig, axes = plt.subplots(n_rows, n_cols, figsize = (21,16))

    if predictions is None:
              
        predictions = [None] * len(labels)


    for i, (x, y_true, y_pred) in enumerate(zip(images, labels, predictions)):
        
        ax = axes.flat[i]
        a = plt.imread(os.path.join(path,"train_images", x ))
        ax.imshow(a)

        
        ax.set_title(f"Class: {y_true}")
             
        if y_pred is not None:
            ax.set_xlabel(f"Pred: {y_pred}", color='blue', fontweight='bold')

        ax.set_xticks([])
        ax.set_yticks([])

<h1 style='background:#4974a5; border:50; color:white'><center>Plotting samples of each class<center><h1>

<h2 style='background:#4974a5; border:50; color:white'><center>Class 0 - Cassava Bacterial Blight (CBB) - Total: 1087<center><h2> 

Symptoms
Small, angular, brown, water-soaked lesions between leaf veins on lower surfaces of leaves; leaf blades turning brown as lesion expands; lesions may have a yello halo; lesions coalesce to form large necrotic patches; defoliation occurs with leaf petioles remaining in horizontal position as leaves drop; dieback of shoots; brown gum may be present on stems, leaves and petioles

Cause
Bacterium

Comments
Most important bacterial disease of cassava; spread by water splash and infected tools; disease more severe in wet conditions; particularly destructive in South America and Africa; most important method of spread is probably through exchange of infected plant cuttings

Management
Rotate cassava crop with non-host; plow crop debris into soil after harvest or remove and burn it; prune infected parts from plant; propagate cuttings only from healthy plants; intercrop cassava with corn (maize) and melon

In [None]:
# check the class
df_0 = df_train[df_train['label']==0]
df_0 = df_0.sample(12)
df_0_id = df_0['image_id'].values
df_0_class = df_0['class'].values

In [None]:
plot(df_0_id, df_0_class)

<h2 style='background:#4974a5; border:50; color:white'><center>Class 1 - Cassava Brown Streak Disease (CBSD) - Total: 2189 <center><h2>   


Symptoms
Leaves: 

- chlorotic or necrotic vein banding in mature leaves which may merge later to form large yellow patches

Stems:

- Brown elongated necrotic lesions on young stems

Tubers:

- necrosis of tubers

- roots develop knots

- internal tissues of roots and tubers stained brown and may rot due to secondary fungus infection

1. yellowing along veins on lower/older leaves ~ 3 months after planting
2. dark brown spots on upper green portion of stem ~ 6 months after planting
3. Severe cases- leaf drying, shoot die-back
4. In Tuber - Brown and hard rot when you cut into it. Causes malformation and root
constriction ~ 10 months after planting

The origin of CBSD is suspected to have arisen from the viruses that are already present on the indigenous African flora. Virus structure and properties: The microscopic studies revealed that the virus is 650 nm long and earlier it was believe to be carlavirus.
Disease diagnosis: The first and foremost important aspect is to identify the disease correctly. Cassava brown streak disease varies in symptoms which made it difficult to identify in the field. It makes further complicated if both cassava brown streak and cassava mosaic diseases occur together. There are few techniques like serological and molecular methods are used to identify the virus in laboratory but have their limitations.

<img src="https://s3.amazonaws.com/plantvillage-production-new/images/pics/000/001/095/original/Cassava_brown_streak_2.jpg?1375898926" alt="Smiley face" width="300" height="300" style="float:left">

In [None]:
# check the class
df_1 = df_train[df_train['label']==1]
df_1 = df_1.sample(12)
df_1_id = df_1['image_id'].values
df_1_class = df_1['class'].values

In [None]:
plot(df_1_id, df_1_class)

<h2 style='background:#4974a5; border:50; color:white'><center>Class 2 - Cassava Green Mottle (CGM) -  Total: 2386<center><h2> 


Scientific Name
Cassava green mottle nepovirus. It has not been confirmed to be a nepovirus; these are viruses that are transmitted by nematodes - hence the name.

Distribution
Narrow. Only known from Solomon Islands. It was first found on Choiseul in the 1970s; more recently (2010), similar symptoms were seen on Malaita.

Hosts
It is only known from cassava, and only from Choiseul (and possibly Malaita), nowhere else in the world. In the lab, several plant species have been infected with this virus; these species are known as "indicator" plants and are used to identify and characterise many kinds of viruses

Symptoms & Life Cycle
Young leaves are puckered with faint to distinct yellow spots (Photo 1), green patterns (mosaics), and twisted margins (Photo 2). Usually, the shoots recover from symptoms and appear healthy. Occasionally, plants become severely stunted, edible roots are absent or, if present, they are small and woody when cooked.

In the lab, the virus can be passed between plants in sap, and also in seed. Thirty percent of the seed of infected tobacco plants was infected. Whether it also spreads in seed of cassava is unknown. Seed is not used for growing cassava, so spread on Choiseul is most likely in diseased cuttings. However, there are other possibilities.

Impact
Surveys on Chosieul showed that the disease is present in most plantings, but the number of infected plants is low. Cuttings taken from diseased plants are much slower to develop than those from plants without symptoms during the previous 9 months, and assumed to be healthy.

Detection & Inspection
Look for yellow patterns on the leaves, from small dots to irregular patches of yellow and green. Look for leaf margins that are distorted. The plants may be stunted.

In [None]:
# check the class
df_2 = df_train[df_train['label']==2]
df_2 = df_2.sample(12)
df_2_id = df_2['image_id'].values
df_2_class = df_2['class'].values

In [None]:
plot(df_2_id, df_2_class)

<h2 style='background:#4974a5; border:50; color:white'><center>Class 3 - Cassava Mosaic Disease (CMD) -  Total: 13158<center><h2> 

Symptoms
Discolored pale green, yellow or white mottled leaves which may be distorted with a reduced size; in highly susceptible cassava cultivars plant growth may be stunted, resulting in poor root yield and low quality stem cuttings.

1. Patches of discolouration (chlorosis) in the leaves that vary from yellow to green.
2. The leaves display size variation and are often severely distorted.
3. Leaf blades sometimes fold depending on severity shrivel.

Cause
Virus
Disease is spread by infected cuttings and by whiteflies. The leaves are yellow, mottled and distorted. If leaves are yellow all over but are a normal size or there brown leaves that that does not indicate disease.

Varieties of cassava resistant to the virus are available in many countries, most traditional varieties of cassava grown in Africa are susceptible to the virus, seek advice from an agricultural extension on suitable varieties for your region 

<img src="https://s3.amazonaws.com/plantvillage-production-new/images/pics/000/003/289/original/whiteflies2.jpg?1410805974" alt="Smiley face" width="250" height="250" style="float:left">


In [None]:
# check the class
df_3 = df_train[df_train['label']==3]
df_3 = df_3.sample(12)
df_3_id = df_3['image_id'].values
df_3_class = df_3['class'].values

In [None]:
plot(df_3_id, df_3_class)

<h2 style='background:#4974a5; border:50; color:white'><center>Class 4 - Healthy - Total: 2577<center><h2>  

In [None]:
# check the class
df_4 = df_train[df_train['label']==4]
df_4 = df_4.sample(12)
df_4_id = df_4['image_id'].values
df_4_class = df_4['class'].values

In [None]:
plot(df_4_id, df_4_class)

<h1 style='background:#4974a5; border:50; color:white'><center>Augmentation techniques with Keras ImageDataGenerator<center><h1>  

## Generate batches of tensor image data with real-time data augmentation

In [None]:
# Change Label from data to String
train = df_train.astype({'label':str})
# split data between training and test
# we are using 20% for test
train, test = train_test_split(train, test_size = .2, random_state=42)

<h1 style='background:#4974a5; border:50; color:white'><center>Model EfficientNetB3<center><h1> 

In [None]:
# Creating dataGernerator
train_datagen = ImageDataGenerator(
                    rotation_range = 45,
                    width_shift_range = 0.2,
                    height_shift_range = 0.2,
                    shear_range = 0.2,
                    zoom_range = 0.2,
                    horizontal_flip = True,
                    vertical_flip = True,
                    fill_mode = 'nearest'
)

In [None]:
# Setting the image size for the model
img_size = 300
size = (img_size, img_size)

In [None]:
train_generator = train_datagen.flow_from_dataframe(
                    train,
                    directory = path+"/train_images",
                    x_col = "image_id",
                    y_col = "class",
                    target_size = size,
                    class_mode = "categorical",
                    batch_size = 32,
                    shuffle = True,
                    seed = 42,
                    interpolation = "nearest"
)

In [None]:
test_generator = train_datagen.flow_from_dataframe(
                    test,
                    directory = path+"/train_images",
                    x_col = "image_id",
                    y_col = "class",
                    target_size = size,
                    class_mode = "categorical",
                    batch_size = 32,
                    shuffle = False,
                    seed = 42,
                    interpolation = "nearest")

In [None]:
# Creating Model
def modelTransf():
    
    model = models.Sequential()
    model.add(EfficientNetB3(input_shape = (img_size, img_size, 3), include_top = False, weights = 'imagenet'))
    model.add(layers.GlobalAveragePooling2D())
    model.add(layers.Dense(256, activation = 'relu'))
    model.add(layers.Dense(256, activation = 'relu'))
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(5, activation = 'softmax'))
    
    return model

In [None]:
model = modelTransf()

In [None]:
# Summary Model
model.summary()

In [None]:
model.compile(loss = 'categorical_crossentropy',
              optimizer = Adam(learning_rate = 0.001),
              metrics = ['accuracy'])

In [None]:
early_stopping = EarlyStopping(monitor = 'val_loss',
                               patience = 10,
                               mode = 'min', 
                               restore_best_weights = True)

checkpoint = ModelCheckpoint('modelB3.hdf5',
                             monitor = 'val_loss',
                             verbose = 1, mode = 'min',
                             save_best_only = True)

reduce_lr = ReduceLROnPlateau(monitor = 'val_loss',
                              factor = 0.2,
                              patience = 10,
                              min_lr = 0.001,
                              mode = 'min', 
                              verbose = 1)

In [None]:
step_size_train = train_generator.n // train_generator.batch_size
step_size_test = test_generator.n // test_generator.batch_size

In [None]:
step_size_train, step_size_test

In [None]:
history = model.fit(train_generator,
                    validation_data = test_generator,
                    epochs = 30,
                    steps_per_epoch = step_size_train,
                    validation_steps = step_size_test,
                    callbacks = [early_stopping, checkpoint, reduce_lr])

# Load Pre-trained model

In [None]:
model_treined = keras.models.load_model('../input/model-trained/E_best_model.hdf5')

In [None]:
filenames = test_generator.filenames
len(filenames)

In [None]:
predictions = model_treined.predict_generator(test_generator, steps = len(filenames))

In [None]:
len(predictions)

In [None]:
predictions2 = []
for i in range(len(predictions)):
    predictions2.append(np.argmax(predictions[i]))

In [None]:
accuracy_score(predictions2, test_generator.classes)

In [None]:
cfm = confusion_matrix(predictions2, test_generator.classes)
cfm

In [None]:
sns.heatmap(cfm, annot=True, fmt="d", cmap='viridis');

In [None]:
pred = pd.DataFrame(predictions2, columns=['class_pred'])
pred['pred'] = pred['class_pred'].map({int(i) : c for i, c in classes.items()})

In [None]:
image_ids_test = test["image_id"].values
labels_test = test["class"].values
pred_result = pred['pred'].values

In [None]:
rand_idxs = np.random.permutation(len(test))[:12]

In [None]:
plot(image_ids_test[rand_idxs], labels_test[rand_idxs], pred_result[rand_idxs])
plt.show()

## Make Submission

In [None]:
submission_file = pd.read_csv('../input/cassava-leaf-disease-classification/sample_submission.csv')
submission_file

In [None]:
path_test = '../input/cassava-leaf-disease-classification/test_images/'

In [None]:
test_img = os.listdir(path_test)
predict = []
for image in test_img:
    img = tf.keras.preprocessing.image.load_img(path_test + image)
    img = img.resize((300, 300))
    img = np.expand_dims(img, axis = 0)
    predict.append(np.argmax(model_treined.predict(img)))

In [None]:
predict

In [None]:
submission = pd.DataFrame({'image_id': test_img, 'label': predict})
submission

In [None]:
submission.to_csv('submission.csv', index = False)

Thanks for reading, please give it an upvote. It is always greatly appreciated!