# Faces Vs. Non-Faces

Load the new dataset

In [1]:
import DatasetSplitter

training_data_50, training_labels_50, testing_data_50, testing_labels_50 = DatasetSplitter.splitDataNonFaces50()
training_data_100, training_labels_100, testing_data_100, testing_labels_100 = DatasetSplitter.splitDataNonFaces100()
training_data_200, training_labels_200, testing_data_200, testing_labels_200 = DatasetSplitter.splitDataNonFaces()
training_data_400, training_labels_400, testing_data_400, testing_labels_400 = DatasetSplitter.splitDataNonFaces400()

# Apply the LDA algorithm for two classes
We only use 1 eigenvector, because the number of used eigenvectors = number of classes - 1, where the number of classes = 2 (Faces & Non-Faces)

In [2]:
# The following code is the implementation of Linear Discriminant Analysis (LDA) but for 2 classes only 
# (The Sb matrix is the only difference between this and the code in LDA.ipynb).

import numpy as np
import scipy as sp
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier

def get_split_data(training_data):
    mat = np.array(training_data)
    split_data = []
    split_data.append(mat[:200])
    split_data.append(mat[200:])
    return split_data

def get_class_means(split_data):
    class_means = []
    for class_matrix in split_data:
        class_means.append(np.mean(class_matrix, axis=0))
    return class_means

def get_centered_classes(split_data, class_means):
    centered_classes = []
    for class_matrix, class_mean in zip(split_data, class_means):
        centered_class = []
        for row in class_matrix:
            centered_class.append(row - class_mean)
        centered_classes.append(centered_class)
    return centered_classes

def get_between_class(class_means):
    Sb = np.outer((class_means[0] - class_means[1]), np.transpose(class_means[0] - class_means[1]))
    return Sb

def get_within_class(centered_classes):
    Sw = np.zeros((10304, 10304))
    for centered_class in centered_classes:
        mat = np.array(centered_class)
        Sw += np.dot(np.transpose(mat), mat)
    Sw_inverse = sp.linalg.pinv(Sw)
    return Sw_inverse

def get_eigens(Sw_inverse, Sb):
    eigenvalues, eigenvectors = np.linalg.eig(np.dot(Sw_inverse, Sb))
    return eigenvalues, eigenvectors

def get_U(eigenvalues, eigenvectors):
    sorted_indecies = np.argsort(eigenvalues)[::-1]
    sorted_eigen_vectors = eigenvectors[:,sorted_indecies]
    U = np.real(sorted_eigen_vectors[:,:1])
    return U

In [3]:
def LDA(training_data, training_labels, testing_data, testing_labels):
    split_data = get_split_data(training_data)
    class_means = get_class_means(split_data)
    centered_classes = get_centered_classes(split_data, class_means)
    Sb = get_between_class(class_means)
    Sw_inverse = get_within_class(centered_classes)
    eigenvalues, eigenvectors = get_eigens(Sw_inverse, Sb)
    projection_matrix = get_U(eigenvalues, eigenvectors)

    projected_training_data = np.dot(training_data , projection_matrix)
    projected_testing_data = np.dot(testing_data , projection_matrix)
    classifier = KNeighborsClassifier(1)
    classifier.fit(projected_training_data, training_labels)
    prediction = classifier.predict(projected_testing_data)
    accuracy = accuracy_score(testing_labels, prediction)

    print ("Accuracy = ", accuracy * 100)


# Using 50 non-face images for training

In [4]:
LDA(training_data_50, training_labels_50, testing_data_50, testing_labels_50)

Accuracy =  91.25


# Using 100 non-face images for training

In [5]:
LDA(training_data_100, training_labels_100, testing_data_100, testing_labels_100)

Accuracy =  93.5


# Using 200 non-face images for training

In [6]:
LDA(training_data_200, training_labels_200, testing_data_200, testing_labels_200)

Accuracy =  89.5


# Using 400 non-face images for training

In [7]:
LDA(training_data_400, training_labels_400, testing_data_400, testing_labels_400)

Accuracy =  92.75
