# KNN Research

In [4]:
import glob
import os
import cv2
import matplotlib.pyplot as plt
import numpy as np

def load_images(source_dir):
    input_images = []
    output_labels = []

    channels = 1

    for image_path in glob.glob(source_dir + "/**/*.jpg", recursive=True):
        image_relative_path = image_path.replace(source_dir, '')
        relative_path_array = image_relative_path.split(os.path.sep)
        output_labels.append(relative_path_array[1])

        img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        resized = cv2.resize(img, (64, 64))
        resized = resized.reshape(64, 64, channels)
        input_images.append(resized/255)

    return input_images, output_labels

In [5]:
print('Start loading process')

source_dir = "/kaggle/input/asl-dataset-research/asl/asl_alphabet_train/asl_alphabet_train"
input_images, output_labels = load_images(source_dir)

print("End loading process")

Start loading process
End loading process


In [6]:
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder

label_encoder = LabelEncoder()
label_encoder.fit(output_labels)

labels_amount = len(label_encoder.classes_)

onehot_encoder = OneHotEncoder(sparse_output=False)

categorical_column = label_encoder.transform(output_labels)
integer_encoded = categorical_column.reshape(len(categorical_column),1)

onehot_encoder.fit(integer_encoded)
output_labels_values = onehot_encoder.transform(integer_encoded)

In [7]:
from sklearn.model_selection import train_test_split

images_train, images_test, labels_train, labels_test = train_test_split(input_images, output_labels_values, test_size=0.3, random_state=42)

print('Training set size : ', len(images_train))
print('Testing set size : ', len(images_test))
print('Labels set size : ', len(labels_train))
print('Labels testing set size : ', len(labels_test))

Training set size :  60900
Testing set size :  26100
Labels set size :  60900
Labels testing set size :  26100


In [8]:
import numpy as np
images_train_flat = np.array([image.flatten() for image in images_train])
images_test_flat = np.array([image.flatten() for image in images_test])
labels_train_flat = np.argmax(np.array(labels_train), axis=1)
labels_test_flat = np.argmax(np.array(labels_test), axis=1)

In [9]:
import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn import preprocessing
from sklearn.metrics import classification_report

knn_model = KNeighborsClassifier(n_neighbors = 20)
knn_fitted_model = knn_model.fit(images_train_flat, labels_train_flat)
knn_predictions = knn_fitted_model.predict(images_test_flat)

knn_conf_matrix = confusion_matrix(labels_test_flat, knn_predictions)
knn_accuracy = accuracy_score(labels_test_flat, knn_predictions)
knn_classification = classification_report(labels_test_flat, knn_predictions)

In [10]:
print("KNN Accuracy : ", knn_accuracy)
print()
print(knn_classification)
print("KNN Confusion Matrix : \n", knn_conf_matrix)

KNN Accuracy :  0.8780842911877395

              precision    recall  f1-score   support

           0       0.84      0.87      0.85       920
           1       0.87      0.90      0.88       866
           2       0.98      0.91      0.95       951
           3       0.89      0.90      0.90       921
           4       0.77      0.83      0.80       898
           5       0.96      0.79      0.87       878
           6       0.94      0.89      0.92       853
           7       0.90      0.94      0.92       895
           8       0.84      0.93      0.88       883
           9       0.90      0.92      0.91       873
          10       0.90      0.88      0.89       972
          11       0.94      0.92      0.93       847
          12       0.89      0.87      0.88       900
          13       0.94      0.94      0.94       904
          14       0.93      0.81      0.86       886
          15       0.96      0.94      0.95       925
          16       0.97      0.93      0.95  