In [1]:
from PIL import Image
import glob
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import scipy.spatial.distance as distance
from sklearn import preprocessing
from sklearn.svm import LinearSVC

In [2]:
img_dir_list = ['PIE/1/*.jpg', 'PIE/4/*.jpg', 'PIE/5/*.jpg', \
               'PIE/19/*.jpg', 'PIE/21/*.jpg', 'PIE/23/*.jpg', \
               'PIE/25/*.jpg', 'PIE/29/*.jpg', 'PIE/33/*.jpg', \
               'PIE/39/*.jpg', 'PIE/40/*.jpg', 'PIE/44/*.jpg', \
               'PIE/45/*.jpg', 'PIE/46/*.jpg', 'PIE/48/*.jpg', \
               'PIE/52/*.jpg', 'PIE/57/*.jpg', 'PIE/58/*.jpg', \
               'PIE/59/*.jpg', 'PIE/67/*.jpg']
image_list = []
image_own = []
train_list = []
train_own = []
test_list = []
test_own = []
image_label = []
own_label = [20] * 10

for i in range (0, 20):
    for filename in glob.glob(img_dir_list[i]):
        im = Image.open(filename)
        arr = np.array(im).flatten()
        image_list.append(arr)
        image_label.append(i)
for filename in glob.glob('own_image/*.jpg'):
    im = Image.open(filename)
    arr = np.array(im).flatten()
    image_own.append(arr)
    
image_list = np.asarray(image_list)
image_own = np.asarray(image_own)
image_label = np.asarray(image_label)
own_label = np.asarray(own_label)

train_list, test_list, train_label, test_label = train_test_split(image_list, image_label, test_size = 0.3)
train_list = np.asarray(train_list)
test_list = np.asarray(test_list)
train_label = np.asarray(train_label)
test_label = np.asarray(test_label)

train_own, test_own, train_own_label, test_own_label = train_test_split(image_own, own_label, test_size = 0.3)
train_own = np.asarray(train_own)
test_own = np.asarray(test_own)
train_own_label = np.asarray(train_own_label)
test_own_label = np.asarray(test_own_label)

random_indices = np.arange(0, train_list.shape[0])
np.random.shuffle(random_indices)
train_sample = train_list[random_indices[:500]]
train_sample_label = train_label[random_indices[:500]]
# print(train_sample)
train_sample = np.concatenate((train_sample, train_own))
train_sample_label = np.concatenate((train_sample_label, train_own_label))
test_list = np.concatenate((test_list, test_own))
test_label = np.concatenate((test_label, test_own_label))
# print(train_sample)
# print(train_own)

In [3]:
sc = preprocessing.StandardScaler()
train_sample_prep = sc.fit_transform(train_sample)
test_list_prep = sc.transform(test_list)

In [4]:
train_image_mean = (np.mean(train_sample_prep, axis = 0)).astype('float')
test_image_mean = (np.mean(test_list_prep, axis = 0)).astype('float')

In [5]:
def pca (image_list):
    image_mean = train_image_mean
    image_diff = image_list - image_mean
    image_diff = np.asarray(image_diff)
    covariance = np.cov(image_diff.T)
#     covariance = np.cov(image_list.T)
#         print(covariance.shape)
    eigenvalue, eigenvector = np.linalg.eig(covariance)
    idx = eigenvalue.argsort()[::-1]
    eigenvalue = np.real(eigenvalue[idx])
    eigenvector = np.real(eigenvector[:, idx])
    eigenvalue = np.asarray(eigenvalue)
    eigenvector = np.asarray(eigenvector)
    
    return eigenvalue, eigenvector

In [6]:
def get_eigen_pair(eigenvalue, eigenvector):
    eigen_pairs = [(eigenvalue[i], eigenvector[:,i]) for i in range(len(eigenvalue))]
    eigen_pairs.sort(key=lambda k: k[0], reverse = True)
    return eigen_pairs

def reduce_dim(image_list, image_mean, eigenvalue, eigenvector, dim):
    eigenpair = get_eigen_pair(eigenvalue, eigenvector)
    w = np.hstack((eigenpair[0][1][:, np.newaxis], eigenpair[1][1][:, np.newaxis]))
    if (dim > 2):
        for i in range (2, dim):
            w = np.hstack((w, eigenpair[i][1][:, np.newaxis]))
    reduce_dim_result = (image_list - image_mean).dot(w)
    return reduce_dim_result

In [7]:
eigenvalue_test, eigenvector_test = pca(test_list_prep)
eigenvalue_train, eigenvector_train = pca(train_sample_prep)

In [8]:
test_80_dim = reduce_dim(test_list_prep, train_image_mean, eigenvalue_train, eigenvector_train, 80)
train_80_dim = reduce_dim(train_sample_prep, train_image_mean, eigenvalue_train, eigenvector_train, 80)

for penalty_param in [0.01, 0.1, 1]:
    model = LinearSVC(C = penalty_param)
    model.fit(train_80_dim, train_sample_label)
    print("training accuracy:", model.score(train_80_dim, train_sample_label))
    print("test accuracy:", model.score(test_80_dim, test_label))

training accuracy: 0.9684418145956607
test accuracy: 0.9159335288367546




training accuracy: 1.0
test accuracy: 0.9335288367546432
training accuracy: 0.9861932938856016
test accuracy: 0.9266862170087976




In [9]:
test_200_dim = reduce_dim(test_list_prep, train_image_mean, eigenvalue_train, eigenvector_train, 200)
train_200_dim = reduce_dim(train_sample_prep, train_image_mean, eigenvalue_train, eigenvector_train, 200)

for penalty_param in [0.01, 0.1, 1]:
    model = LinearSVC(C = penalty_param)
    model.fit(train_200_dim, train_sample_label)
    print("training accuracy:", model.score(train_200_dim, train_sample_label))
    print("test accuracy:", model.score(test_200_dim, test_label))

training accuracy: 1.0
test accuracy: 0.9452590420332356




training accuracy: 1.0
test accuracy: 0.9364613880742912
training accuracy: 0.9960552268244576
test accuracy: 0.9305962854349951


