## SVM Classification

In [1]:
from PIL import Image
import glob
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import scipy.spatial.distance as distance
from sklearn import preprocessing
from sklearn.svm import LinearSVC

Load image files from 'PIE' folder and 'own_image' folder.

The 20 subjects out of the entire data set are selected using a random number generator to ensure randomness and prevent any bias.

The images are splited into training and test sets. 

Since sampling is not mentioned in the instructions provided, the entire training set is used to train the model.

In [2]:
img_dir_list = ['PIE/1/*.jpg', 'PIE/4/*.jpg', 'PIE/5/*.jpg', \
               'PIE/19/*.jpg', 'PIE/21/*.jpg', 'PIE/23/*.jpg', \
               'PIE/25/*.jpg', 'PIE/29/*.jpg', 'PIE/33/*.jpg', \
               'PIE/39/*.jpg', 'PIE/40/*.jpg', 'PIE/44/*.jpg', \
               'PIE/45/*.jpg', 'PIE/46/*.jpg', 'PIE/48/*.jpg', \
               'PIE/52/*.jpg', 'PIE/57/*.jpg', 'PIE/58/*.jpg', \
               'PIE/59/*.jpg', 'PIE/67/*.jpg']
image_list = []
image_own = []
train_list = []
train_own = []
test_list = []
test_own = []
image_label = []
own_label = [20] * 10

for i in range (0, 20):
    for filename in glob.glob(img_dir_list[i]):
        im = Image.open(filename)
        arr = np.array(im).flatten()
        image_list.append(arr)
        image_label.append(i)
for filename in glob.glob('own_image/*.jpg'):
    im = Image.open(filename)
    arr = np.array(im).flatten()
    image_own.append(arr)
    
image_list = np.asarray(image_list)
image_own = np.asarray(image_own)
image_label = np.asarray(image_label)
own_label = np.asarray(own_label)

train_list, test_list, train_label, test_label = train_test_split(image_list, image_label, test_size = 0.3)
train_list = np.asarray(train_list)
test_list = np.asarray(test_list)
train_label = np.asarray(train_label)
test_label = np.asarray(test_label)

train_own, test_own, train_own_label, test_own_label = train_test_split(image_own, own_label, test_size = 0.3)
train_own = np.asarray(train_own)
test_own = np.asarray(test_own)
train_own_label = np.asarray(train_own_label)
test_own_label = np.asarray(test_own_label)

# random_indices = np.arange(0, train_list.shape[0])
# np.random.shuffle(random_indices)
# train_sample = train_list[random_indices[:500]]
# train_sample_label = train_label[random_indices[:500]]

# train_sample = np.concatenate((train_sample, train_own))
# train_sample_label = np.concatenate((train_sample_label, train_own_label))
test_list = np.concatenate((test_list, test_own))
test_label = np.concatenate((test_label, test_own_label))
train_list = np.concatenate((train_list, train_own))
train_label = np.concatenate((train_label, train_own_label))
# print(train_sample)
# print(train_own)

Preprocess the training and test images using StandardScaler in sklearn package

In [3]:
sc = preprocessing.StandardScaler()
# train_sample_prep = sc.fit_transform(train_sample)
train_list_prep = sc.fit_transform(train_list)
test_list_prep = sc.transform(test_list)

Calculate training image mean

In [4]:
train_image_mean = (np.mean(train_list_prep, axis = 0)).astype('float')
# test_image_mean = (np.mean(test_list_prep, axis = 0)).astype('float')

#### Implementation of PCA

Compute the covariance of the difference between each image and training image mean

Use numpy package to compute eigenvalue and eigenvector. Sort the eigenvalues and eigenvectors in descending order

In [5]:
def pca (image_list):
    image_mean = train_image_mean
    image_diff = image_list - image_mean
    image_diff = np.asarray(image_diff)
    covariance = np.cov(image_diff.T)
    
    eigenvalue, eigenvector = np.linalg.eig(covariance)
    idx = eigenvalue.argsort()[::-1]
    eigenvalue = np.real(eigenvalue[idx])
    eigenvector = np.real(eigenvector[:, idx])
    eigenvalue = np.asarray(eigenvalue)
    eigenvector = np.asarray(eigenvector)
    
    return eigenvalue, eigenvector

Construct a list of eigen pairs based on eigenvalues and eigenvectors

#### Reduction of Dimensionality 

Keeping first n eigen pairs from the list of eigen pairs. 

The result is the dot product of the difference between the input image list and training image mean.

In [6]:
def get_eigen_pair(eigenvalue, eigenvector):
    eigen_pairs = [(eigenvalue[i], eigenvector[:,i]) for i in range(len(eigenvalue))]
    eigen_pairs.sort(key=lambda k: k[0], reverse = True)
    return eigen_pairs

def reduce_dim(image_list, image_mean, eigenvalue, eigenvector, dim):
    eigenpair = get_eigen_pair(eigenvalue, eigenvector)
    w = np.hstack((eigenpair[0][1][:, np.newaxis], eigenpair[1][1][:, np.newaxis]))
    if (dim > 2):
        for i in range (2, dim):
            w = np.hstack((w, eigenpair[i][1][:, np.newaxis]))
    reduce_dim_result = (image_list - image_mean).dot(w)
    return reduce_dim_result

Compute eigenvalue and eigenvector of training data

In [7]:
eigenvalue_train, eigenvector_train = pca(train_list_prep)

Use raw face images to train linear SVM.

Output Accuracy for different penalty parameter

In [8]:
print("Raw face images used")
for penalty_param in [0.01, 0.1, 1]:
    model = LinearSVC(C = penalty_param)
    model.fit(train_list_prep, train_label)
    print("training accuracy for penalty param = " + str(penalty_param) + ": " + str(100 * model.score(train_list_prep, train_label)) + "%")
    print("test accuracy for penalty param = " + str(penalty_param) + ": " + str(100 * model.score(test_list_prep, test_label)) + "%")

Raw face images used
training accuracy for penalty param = 0.01: 100.0%
test accuracy for penalty param = 0.01: 98.24046920821115%




training accuracy for penalty param = 0.1: 100.0%
test accuracy for penalty param = 0.1: 98.53372434017595%
training accuracy for penalty param = 1: 100.0%
test accuracy for penalty param = 1: 98.63147605083088%




Use images with dimensionality equals to 80 to train SVM

Output Accuracy for different penalty parameter

In [9]:
test_80_dim = reduce_dim(test_list_prep, train_image_mean, eigenvalue_train, eigenvector_train, 80)
train_80_dim = reduce_dim(train_list_prep, train_image_mean, eigenvalue_train, eigenvector_train, 80)

print("Reduce dimensionality to 80")
for penalty_param in [0.01, 0.1, 1]:
    model = LinearSVC(C = penalty_param)
    model.fit(train_80_dim, train_label)
    print("training accuracy for penalty param = " + str(penalty_param) + ": " + str(100 * model.score(train_80_dim, train_label)) + "%")
    print("test accuracy for penalty param = " + str(penalty_param) + ": " + str(100 * model.score(test_80_dim, test_label)) + "%")

Reduce dimensionality to 80
training accuracy for penalty param = 0.01: 98.11478843736909%
test accuracy for penalty param = 0.01: 96.57869012707722%




training accuracy for penalty param = 0.1: 99.66485127775451%
test accuracy for penalty param = 0.1: 97.5562072336266%
training accuracy for penalty param = 1: 99.95810640971932%
test accuracy for penalty param = 1: 97.0674486803519%




Use images with dimensionality equals to 200 to train SVM

Output Accuracy for different penalty parameter

In [10]:
test_200_dim = reduce_dim(test_list_prep, train_image_mean, eigenvalue_train, eigenvector_train, 200)
train_200_dim = reduce_dim(train_list_prep, train_image_mean, eigenvalue_train, eigenvector_train, 200)

print("Reduce dimensionality to 200")
for penalty_param in [0.01, 0.1, 1]:
    model = LinearSVC(C = penalty_param)
    model.fit(train_200_dim, train_label)
    print("training accuracy for penalty param = " + str(penalty_param) + ": " + str(100 * model.score(train_200_dim, train_label)) + "%")
    print("test accuracy for penalty param = " + str(penalty_param) + ": " + str(100 * model.score(test_200_dim, test_label)) + "%")

Reduce dimensionality to 200
training accuracy for penalty param = 0.01: 99.66485127775451%
test accuracy for penalty param = 0.01: 98.1427174975562%




training accuracy for penalty param = 0.1: 100.0%
test accuracy for penalty param = 0.1: 98.72922776148583%
training accuracy for penalty param = 1: 100.0%
test accuracy for penalty param = 1: 98.63147605083088%


