# Classification of Sugarcane Diseases based on Images

## Initial Setup

Examining the train data shows that there are six (6) classes in total:

In [1]:
classes = [
    "Banded_Chlorosis",
    "Brown_Rust",
    "Brown_Spot",
    "Viral",
    "Yellow_Leaf",
    "Healthy",
]

To make it easier to handle the image data, a Python class called `Classification` is defined and it will use the OpenCV library.

This class will contain the method `load_images` that can be used to load the images, as well as the method `resize_images` for resizing all the images.

Property methods `image_count` and `image_dimensions` also allow us to analyze the loaded images and determine if further preprocessing is needed.

In [2]:
from pathlib import Path
from pprint import pprint

import cv2
import numpy as np


class Classification:
    name: str
    images: list[np.ndarray]

    def __init__(self, name: str):
        self.name = name
        self.images = list()

    @property
    def image_count(self) -> int:
        return len(self.images)

    @property
    def image_dimensions(self) -> dict[tuple[int, int, int], int]:
        dims = {}
        for img in self.images:
            if dims.get(img.shape) is None:
                dims[img.shape] = 1
                continue
            dims[img.shape] += 1
        return dims

    @property
    def image_dimensions_distribution(self) -> dict[tuple[int, int, int], int]:
        distrib = {}
        for key, value in self.image_dimensions.items():
            distrib[key] = value / self.image_count
        return distrib

    def load_images(self, top_folder: str = "train") -> None:
        """
        Empties self.images  then loads images using opencv imread

        Returns the length of image_array

        Raises exception upon error
        """
        try:
            self.images = list()
            path = Path(f"{top_folder}/{self.name}")
            image_paths = [img_path for img_path in path.iterdir() if img_path.is_file()]
            for path in image_paths:
                self.images.append(cv2.imread(str(path)))

        except Exception as e:
            raise e

    def square_images(self, image_size: int = 512, with_padding: bool = True) -> None:
        """
        Resizes all images in self.image_array to the specified image size.

        In the event that the source image is not a square and with_padding is True,
        padding will be added to the smaller side to ensure aspect ratio of 1.0.

        Returns None
        """
        for index, item in enumerate(self.images):
            height, width, _ = self.images[index].shape
            aspect_ratio = height / width

            if not with_padding or aspect_ratio == 1:
                self.images[index] = cv2.resize(item, (image_size, image_size))
                continue

            padding_color = (0, 0, 0)
            if aspect_ratio > 1:
                width_padding = (height - width) // 2
                image = cv2.copyMakeBorder(
                    item,
                    0,
                    0,
                    width_padding,
                    width_padding,
                    cv2.BORDER_CONSTANT,
                    value=padding_color,
                )

            else:
                height_padding = (width - height) // 2
                image = cv2.copyMakeBorder(
                    item,
                    height_padding,
                    height_padding,
                    0,
                    0,
                    cv2.BORDER_CONSTANT,
                    value=padding_color,
                )

            self.images[index] = cv2.resize(image, (image_size, image_size))

    def normalize_images(self) -> None:
        """
        Normalizes the values to be from 0 to 1 instead of 0 to 255.

        Returns None
        """
        for index, item in enumerate(self.images):
            self.images[index] = item / 255.0

    def __repr__(self) -> str:
        return f"{self.name}\n image count: {self.image_count}\n image dimensions: {self.image_dimensions}\n dimension distribution: {self.image_dimensions_distribution}"

## Data Loading and Preprocessing

First, create the instances for each class.

In [3]:
classifications = [Classification(_class) for _class in classes]

Then, for each instance, let us load the images under the path `./train/<class_name>`.

We will also display the number of images as well as the dimensions of all images.

In [4]:
for x in classifications:
    x.load_images()
    pprint(x)

Banded_Chlorosis
 image count: 424
 image dimensions: {(1024, 768, 3): 404, (576, 768, 3): 20}
 dimension distribution: {(1024, 768, 3): 0.9528301886792453, (576, 768, 3): 0.04716981132075472}
Brown_Rust
 image count: 282
 image dimensions: {(1024, 768, 3): 280, (576, 768, 3): 2}
 dimension distribution: {(1024, 768, 3): 0.9929078014184397, (576, 768, 3): 0.0070921985815602835}
Brown_Spot
 image count: 1550
 image dimensions: {(1024, 768, 3): 1481, (576, 768, 3): 69}
 dimension distribution: {(1024, 768, 3): 0.9554838709677419, (576, 768, 3): 0.044516129032258066}
Viral
 image count: 597
 image dimensions: {(1024, 768, 3): 501, (576, 768, 3): 96}
 dimension distribution: {(1024, 768, 3): 0.8391959798994975, (576, 768, 3): 0.16080402010050251}
Yellow_Leaf
 image count: 1074
 image dimensions: {(1024, 768, 3): 1005, (576, 768, 3): 69}
 dimension distribution: {(1024, 768, 3): 0.9357541899441341, (576, 768, 3): 0.06424581005586592}
Healthy
 image count: 387
 image dimensions: {(1024, 768,

Two important pieces of information can be gleaned from the output.

1. It can be seen that the six (6) different classes have an imbalance in image count.

This is a problem because it may introduce excessive bias in our models.
To avoid this problem, when training the models, we must use proper sampling techniques to ensure equal class distribution.

2. The images are rectangular and the dimensions are not homogenous.

Most images have a size of `1024x768` but there are some whose size are `576x768` instead.
To ensure that our model will be able to process these images later, we can pad the images to make them a square, and then we resize it.

Lastly, let us normalize the data so that the values are from 0 to 1 instead of 0 to 255.

In [5]:
# Resize all images to 512x512
IMAGE_SIZE = 128
for x in classifications:
    x.square_images(IMAGE_SIZE, with_padding=True)
    x.normalize_images()
    pprint(x)

Banded_Chlorosis
 image count: 424
 image dimensions: {(128, 128, 3): 424}
 dimension distribution: {(128, 128, 3): 1.0}
Brown_Rust
 image count: 282
 image dimensions: {(128, 128, 3): 282}
 dimension distribution: {(128, 128, 3): 1.0}
Brown_Spot
 image count: 1550
 image dimensions: {(128, 128, 3): 1550}
 dimension distribution: {(128, 128, 3): 1.0}
Viral
 image count: 597
 image dimensions: {(128, 128, 3): 597}
 dimension distribution: {(128, 128, 3): 1.0}
Yellow_Leaf
 image count: 1074
 image dimensions: {(128, 128, 3): 1074}
 dimension distribution: {(128, 128, 3): 1.0}
Healthy
 image count: 387
 image dimensions: {(128, 128, 3): 387}
 dimension distribution: {(128, 128, 3): 1.0}


With the images resized, we must also transform our categorical features into a numeric array.

This can be done using Scikit-learn's LabelBinarizer class.

In [6]:
from sklearn.preprocessing import LabelBinarizer

binarizer = LabelBinarizer()
class_labels: list[np.array] = binarizer.fit_transform(classes)  # type: ignore
label_pairings: dict[str, np.ndarray] = dict(zip(classes, class_labels, strict=False))
# Print output and classes to verify
print("Binarizer classes:")
pprint(binarizer.classes_)
print()
pprint(label_pairings, sort_dicts=False)

Binarizer classes:
array(['Banded_Chlorosis', 'Brown_Rust', 'Brown_Spot', 'Healthy', 'Viral',
       'Yellow_Leaf'], dtype='<U16')

{'Banded_Chlorosis': array([1, 0, 0, 0, 0, 0]),
 'Brown_Rust': array([0, 1, 0, 0, 0, 0]),
 'Brown_Spot': array([0, 0, 1, 0, 0, 0]),
 'Viral': array([0, 0, 0, 0, 1, 0]),
 'Yellow_Leaf': array([0, 0, 0, 0, 0, 1]),
 'Healthy': array([0, 0, 0, 1, 0, 0])}


With our processed images and our transformed class labels, let use generate the arrays which will serve as our input array `X` and label array `Y`.

This can be accomplished by combining the images of our classes and generating the corresponding label array using `label_pairings`.

In [7]:
# Create our array X, the input array
X: np.ndarray = np.vstack(
    [np.array(class_.images) for class_ in classifications]
)  # convert the image list of each class into an array then combine them

# Create our array Y, the label array
Y: np.ndarray = np.vstack(
    [
        [label_pairings[class_.name] for _ in range(class_.image_count)]
        for class_ in classifications
    ],
)  # generate the list of one-hot encoded vectors for each class then combine them

print(f"X.shape: {X.shape}\nY.shape: {Y.shape}")

X.shape: (4314, 128, 128, 3)
Y.shape: (4314, 6)



However, we must not forget that our classes are imbalanced.

It is important that balance this out to avoid overfitting towards the classes with a much larger count.

To balance our classes, we can used the Imbalanced-Learn library and then undersample the Majority class.

In [8]:
from imblearn.under_sampling import RandomUnderSampler

RANDOM_STATE_INT = 1738  # Seed for consistency
sampler = RandomUnderSampler(random_state=RANDOM_STATE_INT, replacement=False)

X_flattened = X.reshape(X.shape[0], -1)  # Flatten X for compatibility with RandomUnderSampler

X_balanced: np.ndarray  # type annotation
Y_balanced: np.ndarray  # type annotation
X_balanced, Y_balanced = sampler.fit_resample(X_flattened, Y)  # type: ignore

X_balanced = X_balanced.reshape(-1, *X.shape[1:])
print(f"X_balanced.shape: {X_balanced.shape}\nY_balanced.shape: {Y_balanced.shape}")

# Count the number of occurences per class
class_distribution: dict[str, int] = {}  #
for label in Y_balanced:
    if class_distribution.get(f"{label}") is None:
        class_distribution[f"{label}"] = 1
        continue
    class_distribution[f"{label}"] += 1

print("\nClass Distribution:")
pprint(class_distribution, sort_dicts=False)

X_balanced.shape: (1692, 128, 128, 3)
Y_balanced.shape: (1692, 6)

Class Distribution:
{'[1 0 0 0 0 0]': 282,
 '[0 1 0 0 0 0]': 282,
 '[0 0 1 0 0 0]': 282,
 '[0 0 0 1 0 0]': 282,
 '[0 0 0 0 1 0]': 282,
 '[0 0 0 0 0 1]': 282}


`X_balanced` and `Y_balanced` represent our data with balanced samples. Printing the class distribution also shows that each class has 282 entries each. This matches the cardinality of our minority class, `Brown_Rust`, as identified by our previous cells

Lastly, for validation purposes of our model, a train-test split will be conducted on our data. 

In [9]:
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(
    X_balanced, Y_balanced, train_size=0.8, random_state=RANDOM_STATE_INT, shuffle=True
)

print(
    f"Shapes\n X_train: {len(X_train)}\n Y_train: {len(Y_train)}\n X_test: {len(X_test)}\n Y_test: {len(Y_test)}"
)

Shapes
 X_train: 1353
 Y_train: 1353
 X_test: 339
 Y_test: 339


## Method 1: Convolution Neural Network (CNN)

The first method for solving this classification problem is through the use of CNN.

We will be using the Keras and TensorFlow libraries.

First, let us setup the environment and model.

In [10]:
import os

import keras

# Set Keras backend to use TensorFlow
os.environ["KERAS_BACKEND"] = "tensorflow"

cnn_model = keras.models.Sequential(
    [
        keras.layers.InputLayer(shape=(IMAGE_SIZE, IMAGE_SIZE, 3)),
        keras.layers.Conv2D(filters=32, kernel_size=(3, 3), padding="Same", activation="relu"),
        keras.layers.MaxPool2D(pool_size=(2, 2)),
        keras.layers.Dropout(0.25),
        # Convolution Layer 2
        keras.layers.Conv2D(filters=64, kernel_size=(3, 3), padding="Same", activation="relu"),
        keras.layers.MaxPool2D(pool_size=(2, 2)),
        keras.layers.Dropout(0.25),
        # Convolution Layer 3
        keras.layers.Conv2D(filters=128, kernel_size=(3, 3), padding="Same", activation="relu"),
        keras.layers.MaxPool2D(pool_size=(2, 2)),
        keras.layers.Dropout(0.25),
        # keras.layers.Conv2D(
        #     filters=256, kernel_size=(5, 5), padding="Same", activation="relu"
        # ),
        # keras.layers.MaxPool2D(pool_size=(2, 2)),
        # fully connected
        keras.layers.Flatten(),
        keras.layers.Dense(256, activation="relu"),
        # keras.layers.Dense(512, activation="relu"),
        # keras.layers.Dropout(0.5),
        keras.layers.Dense(len(classes), activation="softmax"),
    ]
)

cnn_model.summary()


cnn_model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999),  # type: ignore
    loss=keras.losses.CategoricalCrossentropy(),
    metrics=[keras.metrics.CategoricalAccuracy()],
)

EPOCHS = 3
BATCH_SIZE = 250

history = cnn_model.fit(
    X_train,
    Y_train,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    # steps_per_epoch=X_train.shape[0] // BATCH_SIZE,
    validation_data=(X_test, Y_test),
)

Epoch 1/3
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 1s/step - categorical_accuracy: 0.1872 - loss: 3.9624 - val_categorical_accuracy: 0.1652 - val_loss: 1.7933
Epoch 2/3
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 1s/step - categorical_accuracy: 0.2383 - loss: 1.8163 - val_categorical_accuracy: 0.3274 - val_loss: 1.7901
Epoch 3/3
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 1s/step - categorical_accuracy: 0.2797 - loss: 1.7874 - val_categorical_accuracy: 0.1858 - val_loss: 1.7823


## Method 2: Transfer Learning from InceptionV3

In [15]:
from keras.api.applications.inception_v3 import InceptionV3

EPOCHS = 20
BATCH_SIZE = 250

pre_trained_model = InceptionV3(
    input_shape=X_train.shape[1::],
    weights="imagenet",
    include_top=False,  # the fully connected layer at the end
)

for layer in pre_trained_model.layers:
    layer.trainable = False

fully_connected_layer = keras.layers.Flatten()(pre_trained_model.output)
fully_connected_layer = keras.layers.Dense(512, activation="relu")(fully_connected_layer)
fully_connected_layer = keras.layers.Dropout(0.2)(fully_connected_layer)
fully_connected_layer = keras.layers.Dense(len(classes), activation="softmax")(
    fully_connected_layer
)

transfer_model = keras.Model(pre_trained_model.input, fully_connected_layer)

# transfer_model.summary()

transfer_model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999),  # type: ignore
    loss=keras.losses.CategoricalCrossentropy(),
    metrics=[keras.metrics.CategoricalAccuracy(), "acc"],
)

history = transfer_model.fit(
    X_train, Y_train, batch_size=BATCH_SIZE, epochs=EPOCHS, validation_data=(X_test, Y_test)
)

Epoch 1/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 2s/step - acc: 0.2157 - categorical_accuracy: 0.2157 - loss: 14.6207 - val_acc: 0.3097 - val_categorical_accuracy: 0.3097 - val_loss: 3.7671
Epoch 2/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 1s/step - acc: 0.3978 - categorical_accuracy: 0.3978 - loss: 3.7166 - val_acc: 0.5251 - val_categorical_accuracy: 0.5251 - val_loss: 1.3227
Epoch 3/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 1s/step - acc: 0.5321 - categorical_accuracy: 0.5321 - loss: 1.3546 - val_acc: 0.6136 - val_categorical_accuracy: 0.6136 - val_loss: 1.0132
Epoch 4/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 1s/step - acc: 0.6228 - categorical_accuracy: 0.6228 - loss: 0.9709 - val_acc: 0.6519 - val_categorical_accuracy: 0.6519 - val_loss: 0.9768
Epoch 5/20
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 1s/step - acc: 0.7173 - categorical_accuracy: 0.7173 - loss: 0.8331 - val

In [16]:
transfer_model.summary()