In [1]:
import os
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
from libsvm.svmutil import svm_train, svm_predict

In [2]:
def image_to_vector(image_path):
    image = Image.open(image_path)
    image_data = np.asarray(image)
    return image_data.flatten()

In [3]:
np.random.seed(6)

fig_size = (32, 32)
vector_dimension = fig_size[0] * fig_size[1]

choice = [i * 7 % 68 for i in range(1, 25 + 1)]
choice = [0] + choice # Append the selfish part
pic_root_folder = "PIE"
selfish_folder = "0"
train_set_name, test_set_name = [], []
for i in choice:
    images = [f'{i}/{filename}' for filename in os.listdir(f'{pic_root_folder}/{i}')]
    np.random.shuffle(images)
    train_size = int(len(images) * 0.7)
    train_set_name += images[:train_size]
    test_set_name += images[train_size:]

# Generate the label for each set
train_set_label = np.array([int(i.split('/')[0]) for i in train_set_name])
test_set_label = np.array([int(i.split('/')[0]) for i in test_set_name])

# Read and transform image data
train_set_data = np.zeros((len(train_set_name), vector_dimension))
test_set_data = np.zeros((len(test_set_name), vector_dimension))
for i in range(len(train_set_name)):
    train_set_data[i, :] = image_to_vector(f'{pic_root_folder}/{train_set_name[i]}')
for i in range(len(test_set_name)):
    test_set_data[i, :] = image_to_vector(f'{pic_root_folder}/{test_set_name[i]}')

In [4]:
class PCA:
    def __init__(self, X):
        self.mean = np.mean(X, axis=0)
        X_d_n = X - self.mean
        U, S, V_t = np.linalg.svd(X_d_n)
        index = np.argsort(S)[::-1]
        self.V = V_t.T[:, index]

    def transform(self, X, dimension):
        X_d_n = X - self.mean
        return X_d_n @ self.V[:,:dimension]

In [5]:
pca = PCA(train_set_data)

In [6]:
table_rows = []
for d in [80, 200]:
    X_train = pca.transform(train_set_data, d)
    X_test = pca.transform(test_set_data, d)
    for c in [1e-2, 1e-1, 1]:
        print(f'Dimension: {d}, C: {c}')
        model = svm_train(train_set_label, X_train, f'-t 0 -c {c}')
        _, p_acc, _ = svm_predict(test_set_label, X_test, model)
        table_rows.append([d, c, p_acc[0]])
        print('-'*60)


Dimension: 80, C: 0.01
Accuracy = 98.1581% (1279/1303) (classification)
------------------------------------------------------------
Dimension: 80, C: 0.1
Accuracy = 98.1581% (1279/1303) (classification)
------------------------------------------------------------
Dimension: 80, C: 1
Accuracy = 98.1581% (1279/1303) (classification)
------------------------------------------------------------
Dimension: 200, C: 0.01
Accuracy = 98.6186% (1285/1303) (classification)
------------------------------------------------------------
Dimension: 200, C: 0.1
Accuracy = 98.6186% (1285/1303) (classification)
------------------------------------------------------------
Dimension: 200, C: 1
Accuracy = 98.6186% (1285/1303) (classification)
------------------------------------------------------------


In [7]:
from prettytable import PrettyTable
table = PrettyTable()
table.field_names = ['Dimension', 'C', 'Accuracy']
table.add_rows(table_rows)
print(table)

+-----------+------+-------------------+
| Dimension |  C   |      Accuracy     |
+-----------+------+-------------------+
|     80    | 0.01 | 98.15809669992326 |
|     80    | 0.1  | 98.15809669992326 |
|     80    |  1   | 98.15809669992326 |
|    200    | 0.01 | 98.61857252494244 |
|    200    | 0.1  | 98.61857252494244 |
|    200    |  1   | 98.61857252494244 |
+-----------+------+-------------------+
