## Corn Disease Classification

The aim of this project is to buil models (specifically neural networks) that are able to classify images of corn leaves based on a specific disease.

The different diseases are:
- **Blight**: foliar disease of corn (maize) caused by a parasite. With its characteristic cigar-shaped lesions, this disease can cause significant yield loss in susceptible corn hybrids
- **Common Rust**: caused by the a fungus and occurs every growing season. It is seldom a concern in hybrid corn. Early symptoms of common rust are chlorotic flecks on the leaf surface
- **Gray Leaf Spot**: it is a foliar fungal disease that affects maize. GLS is considered one of the most significant yield-limiting diseases of corn worldwide. There are two fungal pathogens that cause GLS. Symptoms seen on corn include leaf lesions, discoloration (chlorosis), and foliar blight

After briefly discussing some tecnical aspects about biology let's jump into something more interesting for us.

We started with a folder, divided into subfolders, containing the different leaf images divided according to the disease. The first step was to build an actual dataset.

## Data Loading

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image, ImageEnhance
from PIL.ImageOps import crop, flip, mirror
import os
import seaborn as sns
import warnings
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
warnings.filterwarnings('ignore')
plt.style.use('ggplot')

In [None]:
RANDOM_STATE = 42 # setting a constant random state for every methos that uses randomization

In [None]:
def create_data():
    name = ['Blight', 'Common_Rust', 'Gray_Leaf_Spot', 'Healthy']
    final_images = []
    final_labels = []

    for disease in name:
        folder_path = 'Corn Images/' + disease
        images = []
        labels = [disease] * len(os.listdir(folder_path))

        for filename in os.listdir(folder_path):
            img_path = os.path.join(folder_path, filename)
            img = Image.open(img_path)
            images.append(img)

        final_images.extend(images)
        final_labels.extend(labels)

    return final_images, final_labels

In [None]:
result = create_data()
dataset = pd.DataFrame({'Image': result[0], 'Label': result[1]})

In [None]:
# a check to see if all the images have been loaded
print(dataset[dataset['Label'] == 'Blight'].count())
print(dataset[dataset['Label'] == 'Common_Rust'].count())
print(dataset[dataset['Label'] == 'Gray_Leaf_Spot'].count())
print(dataset[dataset['Label'] == 'Healthy'].count())

In [None]:
dataset.head(10)

## Data Visualization

In [None]:
def show_image(image):
    image = image.copy()
    plt.imshow(image, aspect='equal')

In [None]:
rng = np.random.default_rng(RANDOM_STATE)
n_cols = 3
n_rows = 2
indexes = rng.choice(len(dataset), n_cols * n_rows)

plt.figure(figsize=(16, 9))

for ii, id in enumerate(indexes, 1):
    plt.subplot(n_rows, n_cols, ii)
    image = dataset['Image'][int(id)]
    show_image(image)
    plt.title(dataset['Label'][int(id)])
    plt.axis('off')

In [None]:
palette = sns.color_palette('hls', 4)

plt.figure(figsize=(16, 9))
sns.histplot(dataset['Label'], bins=4, shrink=0.6, kde=False, color=palette[2])

plt.xlabel('Cateogry')
plt.ylabel('Frequency')
plt.title('Histogram of the Labels')

# Show the plot
plt.show()

In [None]:
x = dataset[['Image', 'Label']] # using another variable to leave the original dataset intact

## Data Preparation

In [None]:
X_train, X_test, y_train, y_test = train_test_split(x['Image'], x['Label'], test_size=0.2, random_state=RANDOM_STATE, shuffle=True, stratify=x['Label'])

In [None]:
print('X_train shape:', X_train.shape)
print('X_test shape:', X_test.shape)
print('y_train shape:', y_train.shape)
print('y_test shape:', y_test.shape)

In [None]:
# check the stratification
print(y_train.count())
print(y_test.count())

In [None]:
enc = OneHotEncoder()

Y_train = enc.fit_transform(y_train[:, np.newaxis]).toarray()
Y_test = enc.transform(y_test[:, np.newaxis]).toarray()

## Data Augmentation

In [None]:
def Resize(content):
    return content.resize((224, 224))

In [None]:
def RandomRotation(content):
    p = 0.2
    if np.random.random() < p:
        return content.rotate(np.random.randint(-45, 45))
    else:
        return content

In [None]:
def RandomHorizontalFlip(content):
    p = 0.2
    if np.random.random() < p:
        return mirror(content)
    else:
        return content

In [None]:
def RandomVerticalFlip(content):
    p = 0.2
    if np.random.random() < p:
        return flip(content)
    else:
        return content

In [None]:
def RandomZoom(content):
    p = 0.2
    if np.random.random() < p:
        cropped = crop(content, np.random.randint(0, 50))
        return cropped.resize((224, 224))
    else:
        return content

In [None]:
def AdjustContrast(content):
    p = 0.2
    if np.random.random() < p:
        return ImageEnhance.Contrast(content).enhance(np.random.uniform(0.5, 1.5))
    else:
        return content

In [None]:
def AdjustBrightness(content):
    p = 0.2
    if np.random.random() < p:
        return ImageEnhance.Brightness(content).enhance(np.random.uniform(0.5, 1.5))
    else:
        return content

In [None]:
def DataAugmentation(content):
    content = RandomRotation(content)
    content = RandomHorizontalFlip(content)
    content = RandomVerticalFlip(content)
    content = RandomZoom(content)
    content = AdjustContrast(content)
    content = AdjustBrightness(content)
    return content

In [None]:
X_train = X_train.apply(Resize)
X_test = X_test.apply(Resize)

In [None]:
X_train = X_train.apply(DataAugmentation)

In [None]:
# CHECK IF IT WORKS
rng = np.random.default_rng(RANDOM_STATE)
n_cols = 3
n_rows = 2
indexes = rng.choice(len(X_train), n_cols)

fig, axes = plt.subplots(n_rows, n_cols, figsize=(16, 9))

for ii, id in enumerate(indexes, 1):
    axes[0] = x['Image'][int(id)]
    show_image(image)
    plt.axis('off')
    
for ii, id in enumerate(indexes, 1):
    axes[1] = X_train[int(id)]
    show_image(image)
    plt.axis('off')

plt.show()

In [None]:
def ToNumpy(content):
    return np.asarray(content, dtype=np.float32)

In [None]:
X_train = X_train.apply(ToNumpy)
X_test = X_test.apply(ToNumpy)

In [None]:
def Rescaling(content):
    return content / 255.0

In [None]:
X_train = X_train.apply(Rescaling)
X_test = X_test.apply(Rescaling) # check if it is needed

In [None]:
X_train = X_train.values
X_test = X_test.values
Y_test = Y_test.values
Y_train = Y_train.values

## Data Modelling

In [None]:
import sklearn
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import OneHotEncoder

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras import Input

from tensorflow.keras.optimizers import SGD
from tensorflow.keras.utils import plot_model

In [None]:
input_shape = (224, 224, 3)

In [None]:
classifier = Sequential()

classifier.add(Input(shape=input_shape))

classifier.add(Conv2D(16, (3, 3), input_shape=input_shape, activation='relu', kernel_initializer='random_normal', strides=(1, 1), name = 'Conv2D_16'))
classifier.add(MaxPooling2D(pool_size=(2, 2), name = 'MaxPooling2D_(2,2)'))

classifier.add(Conv2D(32, (3, 3), input_shape=input_shape, activation='relu', kernel_initializer='random_normal', strides=(1, 1), name = 'Conv2D_32'))
classifier.add(MaxPooling2D(pool_size=(2, 2), name = 'MaxPooling2D_(2,2)_1'))

classifier.add(Conv2D(64, (3, 3), input_shape=input_shape, activation='relu', kernel_initializer='random_normal', strides=(1, 1), name = 'Conv2D_64'))
classifier.add(MaxPooling2D(pool_size=(2, 2), name = 'MaxPooling2D_(2,2)_2'))

classifier.add(Conv2D(128, (3, 3), input_shape=input_shape, activation='relu', kernel_initializer='random_normal', strides=(1, 1), name = 'Conv2D_128'))
classifier.add(MaxPooling2D(pool_size=(2, 2), name = 'MaxPooling2D_(2,2)_3'))

classifier.add(Dropout(0.5), name = 'Dropout')

classifier.add(Flatten(name = 'Flatten'))

classifier.add(Dense(units=4, activation='softmax', kernel_initializer='random_normal', name = 'Dense_SoftMax'))

classifier.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
classifier.summary()

In [None]:
plot_model(classifier, to_file='CNN_ours.png', show_shapes=True, show_layer_names=True)

In [None]:
history = classifier.fit(X_train, Y_train, batch_size=32, epochs=10, verbose=1)