In [None]:
import os
import matplotlib.pyplot as plt
import cv2
import pandas as pd
import tensorflow as tf
import numpy as np

# Retrieve all the masks wore types images
masks_folders_names = os.listdir("ML Datasets - (3 classes)")
imgFileName = []
width = []
height = []
croppedResizedGrayImg = []
targetClass = []

for masks_folder_name in masks_folders_names:
    print(masks_folder_name)
    for fname in os.listdir("ML Datasets - (3 classes)\\" + masks_folder_name):        
        img = cv2.imread(os.path.join("ML Datasets - (3 classes)", masks_folder_name, fname))
        imgFileName.append(fname)
        width.append(img.shape[1])
        height.append(img.shape[0])
        # convert image to grayscale, crop, and resize
        croppedResizedGrayImg.append(cv2.cvtColor(cv2.resize(np.asarray(tf.image.central_crop(img, central_fraction=0.5)), (256, 256)), cv2.COLOR_BGR2GRAY))
        targetClass.append(masks_folder_name)
        print(fname)
        
data = pd.DataFrame({'imgfileName': imgFileName, 'width': width, 'height': height, 'croppedResizedGrayImg': croppedResizedGrayImg, 'targetClass': targetClass})

In [None]:
# upsample the samples in Mask Correctly Worn and Mask Not Worn classes through flipping
rotatedGrayImg = []
targetClass = []
for i in range(0, len(data)):
    if data.loc[i, 'targetClass'] != 'Mask Incorrectly Worn': 
        data.loc[i, 'targetClass'] != 'Mask Incorrectly Worn'
        img = data.loc[i, 'croppedResizedGrayImg']
        rotatedGrayImg.append(cv2.flip(img, 0))
        rotatedGrayImg.append(cv2.flip(img, 1))
        rotatedGrayImg.append(cv2.flip(img, -1))
        targetClass.append(data.loc[i, 'targetClass'])
        targetClass.append(data.loc[i, 'targetClass'])
        targetClass.append(data.loc[i, 'targetClass'])

augmt_data = pd.DataFrame({'imgfileName': None, 'width': 0, 'height': 0, 'croppedResizedGrayImg': rotatedGrayImg, 'targetClass': targetClass})
data = data.append(augmt_data, ignore_index = True)

In [None]:
# plt.imshow(data.loc[data['imgfileName'] == '0017_MRTN_DRNV_0045.JPG', 'croppedResizedGrayImg'].values[0])

In [None]:
# Convert the 2-D image to 1-D 
flattenImg = []
for img in data['croppedResizedGrayImg']:
    flattenImg.append(img.flatten())

dataset = pd.DataFrame(flattenImg)
dataset['targetClass'] = data['targetClass'].astype('category').cat.codes

In [47]:
import time
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import KFold 
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import KernelPCA
from sklearn import svm
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# seperate the data to independent variables and target variable
x = dataset.iloc[:, :-1]
y = dataset.iloc[:, -1]

# scale and perform kernel PCA for dimensions reduction
scaler = MinMaxScaler().fit(x)
x = scaler.transform(x)
x = pd.DataFrame(x)
kpca = KernelPCA(kernel='rbf', fit_inverse_transform=True, gamma=10)
kpca_fit = kpca.fit(x)
x = kpca_fit.transform(x)
x = pd.DataFrame(x)

# set K-fold as 10 and shuffle the data
kf = KFold(n_splits=10, shuffle=True)

# initialize models
models = {
    'SVC' : svm.SVC(kernel='linear'),
    'DT': DecisionTreeClassifier(),
    'RF': RandomForestClassifier(),
    'KNN': KNeighborsClassifier()
}

metrics = ['accuracy', 'precision', 'recall', 'f1score', 'trained time']
models_performance = {model: {metric: [] for metric in metrics} for model in models.keys()}

for model_type, model_obj in models.items():
    print(model_type)
    for train_index, test_index in kf.split(x):
        # partition the data to train and test sets based on each k-fold partition
        x_train, x_test = x.iloc[train_index,:], x.iloc[test_index,:]
        y_train, y_test = y[train_index], y[test_index]

        # fit the model with training data
        start_time = time.time()
        model_obj.fit(x_train, y_train)
        models_performance[model_type]['trained time'].append(time.time() - start_time) 

        # predict on the testing data
        y_pred = model_obj.predict(x_test)

        # store the accuracy store at each fold iteration
        models_performance[model_type]['accuracy'].append(accuracy_score(y_test, y_pred))
        models_performance[model_type]['precision'].append(precision_score(y_test, y_pred, average='weighted'))
        models_performance[model_type]['recall'].append(recall_score(y_test, y_pred, average='weighted'))
        models_performance[model_type]['f1score'].append(f1_score(y_test, y_pred, average='weighted'))

        print(confusion_matrix(y_test, y_pred, labels=[0, 1, 2]))
    
    

SVC
[[  0  55   0]
 [  0 107   0]
 [  0  40   0]]
[[  0  61   0]
 [  0 101   0]
 [  0  40   0]]
[[ 0 69  0]
 [ 0 96  0]
 [ 0 37  0]]
[[ 0 63  0]
 [ 0 91  0]
 [ 0 48  0]]
[[ 0 64  0]
 [ 0 97  0]
 [ 0 41  0]]
[[ 0 56  0]
 [ 0 92  0]
 [ 0 54  0]]
[[ 0 64  0]
 [ 0 87  0]
 [ 0 51  0]]
[[  0  54   0]
 [  0 103   1]
 [  0  43   0]]
[[ 0 54  0]
 [ 0 99  0]
 [ 0 48  0]]
[[ 0 68  0]
 [ 0 83  0]
 [ 0 50  0]]
DT
[[41  5 11]
 [ 6 94  2]
 [ 5  2 36]]
[[47  8  6]
 [ 3 88  0]
 [11  2 37]]
[[55  3  2]
 [ 1 99  2]
 [15  1 24]]
[[58  4  8]
 [ 3 89  0]
 [ 7  0 33]]
[[45  6  8]
 [ 1 96  2]
 [ 9  1 34]]
[[56  5  8]
 [ 3 94  0]
 [ 6  1 29]]
[[50  7  4]
 [ 3 91  0]
 [ 7  2 38]]
[[51  1  5]
 [ 3 95  2]
 [10  0 34]]
[[42  1  7]
 [ 7 90  0]
 [17  3 34]]
[[46  8 10]
 [ 1 82  0]
 [15  2 37]]
RF
[[56  2  5]
 [ 2 92  1]
 [17  1 26]]
[[53 10  9]
 [ 4 87  0]
 [17  0 22]]
[[54  4  4]
 [ 1 91  0]
 [16  3 29]]
[[46  8  6]
 [ 2 97  1]
 [12  1 29]]
[[44  7  9]
 [ 5 92  0]
 [10  2 33]]
[[50  4  7]
 [ 3 90  1]
 [13  1 33]]
[

In [46]:
# Get the overall performance score of the models (average the scores obtained in each k-fold iteration)
from statistics import mean 

models_overall_performance = {model: {metric: None for metric in metrics} for model in models.keys()}

for model_type, metrics_dict in models_performance.items():
    for metric_type, metric_scores in metrics_dict.items():
        if metric_type != 'trained time':
            models_overall_performance[model_type][metric_type] = round(mean(metric_scores), 3)
        else:
            models_overall_performance[model_type][metric_type] = str(round(sum(metric_scores), 2)) + ' seconds'
    

models_overall_performance

{'SVC': {'accuracy': 0.474,
  'precision': 0.227,
  'recall': 0.474,
  'f1score': 0.306,
  'trained time': '63.98 seconds'},
 'DT': {'accuracy': 0.857,
  'precision': 0.859,
  'recall': 0.857,
  'f1score': 0.857,
  'trained time': '50.79 seconds'},
 'RF': {'accuracy': 0.838,
  'precision': 0.839,
  'recall': 0.838,
  'f1score': 0.835,
  'trained time': '86.62 seconds'},
 'KNN': {'accuracy': 0.316,
  'precision': 0.102,
  'recall': 0.316,
  'f1score': 0.154,
  'trained time': '0.3 seconds'}}

In [None]:
# Mask Correctly Worn\0003_MRCW_SRGM_0000.jpg
# Mask Incorrectly Worn\0001_MRNN_NMDM_0000.jpg
test = cv2.imread(r'C:\Users\Owner\Documents\Master of Data Science\Semester 2\WQD7006 Machine Learning\Assignment\Assignment 2\ML Datasets - (3 classes)\Mask Correctly Worn\0001_MRCW_DRNV_0000.jpg')
test = cv2.cvtColor(cv2.resize(np.asarray(tf.image.central_crop(test, central_fraction=0.5)), (256, 256)), cv2.COLOR_BGR2GRAY)

cv2.imshow('gray', test)
cv2.waitKey(0)
cv2.destroyAllWindows()

test = test.flatten().reshape(1, -1)
test = scaler.transform(test)
test = kpca_fit.transform(test)
models['DT'].predict(test)