### 1. Import libraries for image reading and processing

In [2]:
import numpy as np
import cv2 as cv
import os.path
import math
from torchvision import transforms, datasets
from torch import nn, optim

### 2. Presets for training

In [3]:
# Set the path to the current directory
current_dir = os.getcwd()

# Set the path to the training sets
training_sets_angular_leaf_spot = os.path.join(os.getcwd(), "angular_leaf_spot")
training_sets_bean_rust = os.path.join(os.getcwd(), "bean_rust")
training_sets_healthy = os.path.join(os.getcwd(), "healthy")

# Set the path to the predicting sets
predicting_sets = os.path.join(os.getcwd(), "prediction")

2-1 Euclidean distance for distance voting

In [4]:
def pixel_euclidean_distance(img1, img2):
    return np.sqrt(np.sum(np.square(img1 - img2)))

2-2 Define a function to set the samples for training

In [5]:
def training_set_loader(label, images, labels):
    size = (30, 30)
    # Arrays to store images and labels

    # they could be counted by same indices
    images_in_folder = os.listdir(label)
    for filename in images_in_folder:
        img = cv.imread(os.path.join(os.getcwd(), label, filename), cv.IMREAD_GRAYSCALE)

        if img is not None:
            img = cv.resize(img, size)
            images.append(img.flatten())


            labels.append(label)

        else:
            print(f"Error: Image not found: {filename}. Check img path.")


    return np.array(images), np.array(labels)

2-3 KNN Classification

In [6]:
def kNearestNeighbour(test_image, train_images, train_labels, k):
    distances = []

    # Calculate the distance between the test image and every image in the training set
    for img in train_images:
        distances.append(pixel_euclidean_distance(test_image, img))

    # Sort the distances and get the indices of k smallest distances
    k_indices = np.argsort(distances)[:k]

    # Retrieve the labels of the k nearest neighbors
    k_nearest_labels = [train_labels[i] for i in k_indices]

    return k_indices, k_nearest_labels

### 3. Get images 

In [7]:
# Set the path to the testing sets
# Initialize the images and labels containers
images_container, labels_container = [], []

In [8]:
# 3-1 Images - Angular Spotted Leaves
training_set_loader("angular_leaf_spot", images_container, labels_container)

(array([[ 36, 174, 140, ...,  86,  24,  50],
        [105, 107, 106, ...,  36,  37,  69],
        [111, 127, 114, ...,  56,  52, 133],
        ...,
        [218, 238, 114, ...,  66,  83,  23],
        [115, 116, 140, ..., 151, 141, 143],
        [ 83,  93,  82, ..., 100,  96,  89]], dtype=uint8),
 array(['angular_leaf_spot', 'angular_leaf_spot', 'angular_leaf_spot',
        'angular_leaf_spot', 'angular_leaf_spot', 'angular_leaf_spot',
        'angular_leaf_spot', 'angular_leaf_spot', 'angular_leaf_spot',
        'angular_leaf_spot', 'angular_leaf_spot', 'angular_leaf_spot',
        'angular_leaf_spot', 'angular_leaf_spot', 'angular_leaf_spot',
        'angular_leaf_spot', 'angular_leaf_spot', 'angular_leaf_spot',
        'angular_leaf_spot', 'angular_leaf_spot', 'angular_leaf_spot',
        'angular_leaf_spot', 'angular_leaf_spot', 'angular_leaf_spot',
        'angular_leaf_spot', 'angular_leaf_spot', 'angular_leaf_spot',
        'angular_leaf_spot', 'angular_leaf_spot', 'angular_leaf

In [9]:
# 3-2 Images - Rusted beans Leaves
training_set_loader("bean_rust", images_container, labels_container)

(array([[ 36, 174, 140, ...,  86,  24,  50],
        [105, 107, 106, ...,  36,  37,  69],
        [111, 127, 114, ...,  56,  52, 133],
        ...,
        [ 76,  65,  63, ...,  92,  79, 114],
        [119,  72, 105, ...,  39,  30,  31],
        [ 69,  77,  70, ..., 124, 162, 162]], dtype=uint8),
 array(['angular_leaf_spot', 'angular_leaf_spot', 'angular_leaf_spot',
        'angular_leaf_spot', 'angular_leaf_spot', 'angular_leaf_spot',
        'angular_leaf_spot', 'angular_leaf_spot', 'angular_leaf_spot',
        'angular_leaf_spot', 'angular_leaf_spot', 'angular_leaf_spot',
        'angular_leaf_spot', 'angular_leaf_spot', 'angular_leaf_spot',
        'angular_leaf_spot', 'angular_leaf_spot', 'angular_leaf_spot',
        'angular_leaf_spot', 'angular_leaf_spot', 'angular_leaf_spot',
        'angular_leaf_spot', 'angular_leaf_spot', 'angular_leaf_spot',
        'angular_leaf_spot', 'angular_leaf_spot', 'angular_leaf_spot',
        'angular_leaf_spot', 'angular_leaf_spot', 'angular_leaf

In [10]:
# 3-3 Images - Angular Spotted Leaves
training_set_loader("healthy", images_container, labels_container)

Error: Image not found: healthy_train.120.jpg. Check img path.


(array([[ 36, 174, 140, ...,  86,  24,  50],
        [105, 107, 106, ...,  36,  37,  69],
        [111, 127, 114, ...,  56,  52, 133],
        ...,
        [ 80,  73, 129, ...,  85, 139,  87],
        [ 50,  29,  55, ...,  53,  35,  55],
        [ 94,  60, 137, ..., 133, 145, 133]], dtype=uint8),
 array(['angular_leaf_spot', 'angular_leaf_spot', 'angular_leaf_spot',
        'angular_leaf_spot', 'angular_leaf_spot', 'angular_leaf_spot',
        'angular_leaf_spot', 'angular_leaf_spot', 'angular_leaf_spot',
        'angular_leaf_spot', 'angular_leaf_spot', 'angular_leaf_spot',
        'angular_leaf_spot', 'angular_leaf_spot', 'angular_leaf_spot',
        'angular_leaf_spot', 'angular_leaf_spot', 'angular_leaf_spot',
        'angular_leaf_spot', 'angular_leaf_spot', 'angular_leaf_spot',
        'angular_leaf_spot', 'angular_leaf_spot', 'angular_leaf_spot',
        'angular_leaf_spot', 'angular_leaf_spot', 'angular_leaf_spot',
        'angular_leaf_spot', 'angular_leaf_spot', 'angular_leaf

In [11]:
# 3-4 Images - Cases for prediction
predicting_images_container, predicting_labels_container = [], []

testcase_angular = os.path.join(os.getcwd(), "prediction", "angular_leaf_spot")
testcase_rusted = os.path.join(os.getcwd(), "prediction", "bean_rust")
testcase_healthy = os.path.join(os.getcwd(), "prediction", "healthy")

resize_to = (30, 30)

In [12]:
# 3-4-1 Images - Angular Spotted Leaves

for filename in os.listdir(testcase_angular):
    img = cv.imread(os.path.join(testcase_angular, filename), cv.IMREAD_GRAYSCALE)

    if img is not None:
        img = cv.resize(img, resize_to)
        predicting_images_container.append(img.flatten())

        predicting_labels_container.append("angular_leaf_spot")

    else:
        print(f"Error: Image not found: {filename}. Check img path.")

In [13]:
# 3-4-2 Images - Rusted beans Leaves

for filename in os.listdir(testcase_rusted):
    img = cv.imread(os.path.join(testcase_rusted, filename), cv.IMREAD_GRAYSCALE)

    if img is not None:
        img = cv.resize(img, resize_to)
        predicting_images_container.append(img.flatten())

        predicting_labels_container.append("bean_rust")

    else:
        print(f"Error: Image not found: {filename}. Check img path.")

In [14]:
# 3-4-3 Images - Healthy Leaves

for filename in os.listdir(testcase_healthy):
    img = cv.imread(os.path.join(testcase_healthy, filename), cv.IMREAD_GRAYSCALE)

    if img is not None:
        img = cv.resize(img, resize_to)
        predicting_images_container.append(img.flatten())

        predicting_labels_container.append("healthy")

    else:
        print(f"Error: Image not found: {filename}. Check img path.")

### 4. Normalise Images

### 5. Test a Model

##### 5-1 Single Case Testcase

In [15]:
single_test_image = predicting_images_container[0]
single_test_image_label = predicting_labels_container[0]


# Test a code if it's working
result_indices, result_labels = kNearestNeighbour(
    single_test_image, images_container, labels_container, 5
)

# find frequency of each value
result_indices, counts = np.unique(result_labels, return_counts=True)
# display each value with highest frequency
result_nearest_labels = result_indices[counts == counts.max()]


# Check if the test label is among the labels of the nearest neighbors
if single_test_image_label in result_nearest_labels:
    print(
        f'Test label "{single_test_image_label}" is among the labels of the nearest neighbors. '
    )
else:
    print(
        f'Test label "{single_test_image_label}" is not among the labels of the nearest neighbors. '
        f"The image is actually in {np.array2string(result_nearest_labels)}.\n\n"
    )

# Print the nearest images and their labels
print("Labels of K nearest images:", result_labels)

Test label "angular_leaf_spot" is not among the labels of the nearest neighbors. The image is actually in ['healthy'].


Labels of K nearest images: ['healthy', 'healthy', 'angular_leaf_spot', 'angular_leaf_spot', 'healthy']


##### 5-2 Multiple Cases Test

##### 5-2-1 Define a function

In [16]:
def test_knn_accuracy(test_images, test_labels, train_images, train_labels, k):
    correct_cases = 0
    total_cases = len(test_images)

    for i in range(total_cases):
        single_test_image = test_images[i]
        single_test_image_label = test_labels[i]

        # Get the nearest neighbors
        _, result_labels = kNearestNeighbour(
            single_test_image, train_images, train_labels, k
        )

        # Find the most frequent label among the nearest neighbors
        result_indices, counts = np.unique(result_labels, return_counts=True)
        result_nearest_labels = result_indices[counts == counts.max()]

        # Check if the test label is among the labels of the nearest neighbors
        if single_test_image_label in result_nearest_labels:
            correct_cases += 1

    # Calculate accuracy
    accuracy = correct_cases / total_cases
    return accuracy

In [23]:
# Assuming you have arrays `predicting_images_container` and `predicting_labels_container` for testing
accuracy = test_knn_accuracy(predicting_images_container, predicting_labels_container, images_container, labels_container, 17)
print(f"Accuracy: {accuracy * 100:.2f}%")


Accuracy: 51.85%
