# Classification of Cassava Leaves

## K-Nearest Neighbors

### Imports

In [1]:
import os  # for path operations
import pickle  # for loading created data
import time  # calculating durations

import numpy as np  # for the matrix operations
from sklearn.neighbors import KNeighborsClassifier  # KNN classifier model

### Definiation of parameters

In [2]:
weights = ['uniform', 'distance']  # point weight calculation
neighbors = [27, 57]  # K neighbors count

IMG_W, IMG_H, CHAN = 200, 200, 3  # input data parameters 200x200 pixels and 3 color channels as RGB

# below path should be edit
input_data_path = "" # this directory should be the output_data_path of aug_image_editing script
output_data_path = ""

train_data_path = os.path.join(input_data_path, f"train_data/aug_{IMG_W}x{IMG_H}x{CHAN}")
test_data_path = os.path.join(input_data_path, f"test_data/aug_{IMG_W}x{IMG_H}x{CHAN}")

### Loading input data

In [3]:
with open(os.path.join(train_data_path, "train.pickle"), "rb") as f:
    X_train_raw = pickle.load(f)  # loading train image data
    X_train = X_train_raw.reshape([-1, IMG_W*IMG_H*CHAN])  # flattening image data to use as dimensions
with open(os.path.join(train_data_path, "label.pickle"), "rb") as f:
    y_train = pickle.load(f)  # loading train label data

print(f"train data: {X_train.shape}, {y_train.shape}")

FileNotFoundError: [Errno 2] No such file or directory: 'train_data/aug_200x200x3\\train.pickle'

### Loading output

In [None]:
with open(os.path.join(test_data_path, "test.pickle"), "rb") as f:
    X_test_raw = pickle.load(f)  # loading test image data
    X_test = X_test_raw.reshape([-1, IMG_W*IMG_H*CHAN])  # flattening image data to use as dimensions
with open(os.path.join(test_data_path, "label.pickle"), "rb") as f:
    y_test = pickle.load(f)  # loading test label data
    
print(f"test data: {X_test.shape}, {y_test.shape}")

### Calculating class weights

In [None]:
class_weights = {}
for i in range(5):
    class_weights[i] = len(y_train) / np.count_nonzero(y_train == i)
print(class_weights)

### Iterative model fitting

In [None]:
for weight in weights:
    for n in neighbors:
        model = KNeighborsClassifier(n_neighbors=n, weights=weight, n_jobs=-1)  # classifier

        # Training
        train_t0 = time.time()
        print("Training starts")
        model.fit(X_train, y_train)  # Fitting
        train_t = time.time() - train_t0

        # Testing
        print("Testing Starts")
        test_t0 = time.time()
        predictions = model.predict(X_test) 
        results = np.array([np.argmax(prediction)
                            for prediction in predictions])  # find predictions with one-high detection
        test_t = time.time() - test_t0
        
        acc = np.count_nonzero(results == y_test) / len(y_test)  # count and calculate true predictions
        with open(f"accuracies_knn_{IMG_W}x{IMG_H}x{CHAN}_{weight}_{n}.out", "w") as f:
            f.write(f"{weight},{n},{acc},{train_t},{test_t}\n")  # save accuracy results
            f.flush()
        print(f"{weight},{n},{acc},{train_t},{test_t}")