# Install Packages

In [195]:
import argparse
import cv2
import numpy as np 
import os
from sklearn.cluster import KMeans
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from matplotlib import pyplot as plt
from sklearn import svm, datasets
from sklearn.metrics import confusion_matrix
from sklearn.utils.multiclass import unique_labels
from sklearn.metrics.pairwise import chi2_kernel
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score

import h5py
import glob
import warnings
from matplotlib import pyplot
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

warnings.filterwarnings('ignore')

In [196]:
data_dir = '/home/18rvb/ELEC872/DisguiseFaceRecognition/TrainTestDataset'

# Global Features (Color Histograms, Haralick Textures, Hu Moments)

In [197]:
# feature-descriptor-1: Hu Moments
bins = 8

def fd_hu_moments(image):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    feature = cv2.HuMoments(cv2.moments(image)).flatten()
    return feature

# # feature-descriptor-2: Haralick Texture
# def fd_haralick(image):
#     # convert the image to grayscale
#     gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
#     # compute the haralick texture feature vector
#     haralick = mahotas.features.haralick(gray).mean(axis=0)
#     # return the result
#     return haralick

# feature-descriptor-3: Color Histogram
def fd_histogram(image, mask=None):
    # convert the image to HSV color-space
    image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    # compute the color histogram
    hist  = cv2.calcHist([image], [0, 1, 2], None, [bins, bins, bins], [0, 256, 0, 256, 0, 256])
    # normalize the histogram
    cv2.normalize(hist, hist)
    # return the histogram
    return hist.flatten()

# Function to read images

In [198]:
def read_imgs(img_path, fixed_size=tuple((500, 500))):
    image = pyplot.imread(img_path)
    image = cv2.resize(image, fixed_size)
    image_new = np.array(image, dtype=np.uint8)
#     image_new = image.astype('uint8')
    gray_img = cv2.cvtColor(image_new, cv2.COLOR_BGR2GRAY)
    return image_new

# Load images from training directory and create labels

In [199]:
train_path = data_dir + '/train'

disguised_paths = []
original_paths = []
disguised_labels = []
original_labels = []

import os.path
for path, directories, files in os.walk(train_path + '/Disguise'):
        for file in files:
            disguised_paths.append(os.path.join(path, file))
            disguised_labels.append(0)
            
for path, directories, files in os.walk(train_path + '/Original'):
        for file in files:
            original_paths.append(os.path.join(path, file))
            original_labels.append(1)

In [200]:
test_path = data_dir + '/val'

test_disguised_paths = []
test_original_paths = []
test_disguised_labels = []
test_original_labels = []

import os.path
for path, directories, files in os.walk(test_path + '/Disguise'):
        for file in files:
            test_disguised_paths.append(os.path.join(path, file))
            test_disguised_labels.append(0)
            
for path, directories, files in os.walk(test_path + '/Original'):
        for file in files:
            test_original_paths.append(os.path.join(path, file))
            test_original_labels.append(1)

In [201]:
labels = []
img_paths = []

labels.extend(disguised_labels)
labels.extend(original_labels)
labels.extend(test_disguised_labels)
labels.extend(test_original_labels)

img_paths.extend(disguised_paths)
img_paths.extend(original_paths)
img_paths.extend(test_disguised_paths)
img_paths.extend(test_original_paths)

In [202]:
print(np.array(labels).shape)

(4343,)


In [205]:
global_features = []
# errored_imgs = []
labels_final = []

for (num, x), lbls in zip(enumerate(img_paths), labels):
#     try:
    if num not in errored_imgs:
        image = read_imgs(x)
        fv_hu_moments = fd_hu_moments(image)
        fv_histogram  = fd_histogram(image)
        labels_final.append(lbls)
#     except:
#         errored_imgs.append(num)

        global_feature = np.hstack([fv_histogram, fv_hu_moments])
        global_features.append(global_feature)

print("[STATUS] feature vector size {}".format(np.array(global_features).shape))

[STATUS] feature vector size (3945, 519)


In [206]:
from sklearn.preprocessing import MinMaxScaler

In [207]:
# scale features in the range (0-1)
scaler            = MinMaxScaler(feature_range=(0, 1))
rescaled_features = scaler.fit_transform(global_features)

In [208]:
print(np.array(rescaled_features).shape)
print(np.array(labels_final).shape)

(3945, 519)
(3945,)


In [209]:
num_trees = 100
test_size = 0.10
seed      = 9

(trainDataGlobal, testDataGlobal, trainLabelsGlobal, testLabelsGlobal) = train_test_split(np.array(rescaled_features),
                                                                                          np.array(labels_final),
                                                                                          test_size=test_size,
                                                                                          random_state=seed)

In [210]:
print("[STATUS] splitted train and test data...")
print("Train data  : {}".format(trainDataGlobal.shape))
print("Test data   : {}".format(testDataGlobal.shape))
print("Train labels: {}".format(trainLabelsGlobal.shape))
print("Test labels : {}".format(testLabelsGlobal.shape))

[STATUS] splitted train and test data...
Train data  : (3550, 519)
Test data   : (395, 519)
Train labels: (3550,)
Test labels : (395,)


In [211]:
models = []
models.append(('LR', LogisticRegression(random_state=seed)))
models.append(('LDA', LinearDiscriminantAnalysis()))
models.append(('KNN', KNeighborsClassifier()))
models.append(('CART', DecisionTreeClassifier(random_state=seed)))
models.append(('RF', RandomForestClassifier(n_estimators=num_trees, random_state=seed)))
models.append(('NB', GaussianNB()))
models.append(('SVM', SVC(random_state=seed)))

scoring    = "accuracy"

In [212]:
results = []
names   = []

# 10-fold cross validation
for name, model in models:
    kfold = KFold(n_splits=10, random_state=seed)
    cv_results = cross_val_score(model, trainDataGlobal, trainLabelsGlobal, cv=kfold, scoring=scoring)
    results.append(cv_results)
    names.append(name)
    msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
    print(msg)

LR: 0.695211 (0.018072)
LDA: 0.682254 (0.018116)
KNN: 0.686197 (0.024789)
CART: 0.697746 (0.023162)
RF: 0.737183 (0.018730)
NB: 0.513803 (0.017465)
SVM: 0.700845 (0.020461)
