In [6]:
import os
import cv2
import numpy as np
import random

In [7]:
IMAGES_PATH = os.path.join('dataset')

In [8]:
dataset = []
trainset = []
labels = []

In [9]:
def mse(r, trainset_percentage):
    deviation = dataset - r
    mse_value = (deviation**2).mean(axis=1)
    minimal_deviations = np.argmin(mse_value)
    
    return np.ceil(minimal_deviations / trainset_percentage)

In [13]:
def calculate(trainset_percentage, shuffle=False):
    dataset.clear()
    trainset.clear()
    labels.clear()
    
    for folder_idx, (path, dirs, files) in enumerate(os.walk(IMAGES_PATH)):
        if shuffle:
            random.shuffle(files)
            
        for file_idx, filename in enumerate(files):
            if filename.endswith(".pgm"):
                image = cv2.imread(os.path.join(path, filename))

                # Refactor this:
                if ((file_idx + 1) > trainset_percentage):
                    labels.append(folder_idx)
                    trainset.append(image.flatten())
                else:
                    dataset.append(image.flatten())

    exact_matches = np.apply_along_axis(mse, 1, trainset, trainset_percentage) == labels

    print('Accuracy [Shuffle: %s][Trainset percentage: %s]: %s' % (shuffle, trainset_percentage, np.sum(exact_matches) / len(labels)))
    

    

In [14]:
def main():
    percentages = range(5, 10)
    
    for percentage in percentages:
        calculate(percentage)
    
    for percentage in percentages:
        calculate(percentage, shuffle=True)

In [15]:
main()

Accuracy [Shuffle: False][Trainset percentage: 5]: 0.83
Accuracy [Shuffle: False][Trainset percentage: 6]: 0.85
Accuracy [Shuffle: False][Trainset percentage: 7]: 0.85
Accuracy [Shuffle: False][Trainset percentage: 8]: 0.9125
Accuracy [Shuffle: False][Trainset percentage: 9]: 0.975
Accuracy [Shuffle: True][Trainset percentage: 5]: 0.74
Accuracy [Shuffle: True][Trainset percentage: 6]: 0.79375
Accuracy [Shuffle: True][Trainset percentage: 7]: 0.8916666666666667
Accuracy [Shuffle: True][Trainset percentage: 8]: 0.9125
Accuracy [Shuffle: True][Trainset percentage: 9]: 0.95
