In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np
import os
import cv2
from matplotlib import pyplot as plt


In [None]:
def shuffle_data(data, seed):
    rng = np.random.default_rng(seed)
    shuffled_indices = rng.permutation(len(data))
    return [data[i] for i in shuffled_indices]
    

def train_test_split_from_directory(data_dir, image_size, seed):
    X_train, X_test, y_train, y_test = [], [], [], []

    for subdir in os.listdir(data_dir):
        subdir_path = os.path.join(data_dir, subdir)
        X_subdir, y_subdir = [], []

        for filename in os.listdir(subdir_path):
            img = cv2.resize(cv2.cvtColor(cv2.imread(os.path.join(subdir_path, filename)), cv2.COLOR_BGR2RGB), image_size)
            X_subdir.append(img)
            y_subdir.append(subdir)
        
        X_train_subdir, X_test_subdir, y_train_subdir, y_test_subdir = train_test_split(X_subdir, y_subdir, test_size=0.2, random_state=seed)

        X_train.extend(X_train_subdir)
        X_test.extend(X_test_subdir)
        y_train.extend(y_train_subdir)
        y_test.extend(y_test_subdir)

    X_train = shuffle_data(X_train, seed)
    X_test = shuffle_data(X_test, seed)
    y_train = shuffle_data(y_train, seed)
    y_test = shuffle_data(y_test, seed)
        
    return X_train, X_test, y_train, y_test

In [None]:
def create_knn(k):
    return KNeighborsClassifier(n_neighbors=k)


def fit_knn(model, X_train, X_test, y_train, y_test):
    flatten = lambda data: np.reshape(data, (len(data), -1))

    model.fit(flatten(X_train), y_train)

    y_train_pred = model.predict(flatten(X_train))
    y_test_pred = model.predict(flatten(X_test))

    print('Model Accuracy Stats:')
    print('Train Data Accuracy:', accuracy_score(y_train, y_train_pred))
    print('Test Data Accuracy:', accuracy_score(y_test, y_test_pred))


def predict(model, class_map, images):
    swapped_class_map = {v: k for k, v in class_map.items()}
    for i, image in enumerate(images):
        print('Image', i+1)
        plt.imshow(image)
        plt.axis('off')
        plt.show()
        pred = model.predict(np.reshape(image, (1, -1)))
        print('Image', i+1, 'Prediction:', swapped_class_map[pred[0]])

In [None]:
data_dir = 'pizza types'
image_size = (256, 256)
seed = 42
k = 20

In [None]:
X_train, X_test, y_train, y_test = train_test_split_from_directory(data_dir, image_size, seed)

# Normalizing
X_train = list(map(lambda value: value / 255., X_train))
X_test = list(map(lambda value: value / 255., X_test))

# Converting y values to numbers
class_map = {'Black Olive': 0, 'Cheese': 1, 'Hawaiian': 2, 'Pepperoni': 3, 'Taco': 4}

y_train = [class_map[label] for label in y_train]
y_test = [class_map[label] for label in y_test]


model = create_knn(k)
fit_knn(model, X_train, X_test, y_train, y_test)

In [None]:
images = []
for filename in os.listdir('test pizzas'):
    file_path = os.path.join('test pizzas', filename)
    image = cv2.resize(cv2.cvtColor(cv2.imread(file_path), cv2.COLOR_BGR2RGB), (image_size))/255.
    images.append(image)

predict(model, class_map, images)