### Table of Contents

### 00. Load Data
Choose 25 images by yourself and indicate below:

In [1]:
import random

In [2]:
def get_random_list(num_items, start, end, set_seed=123):
    random.seed(set_seed)
    selected_idx = [i for i in range(start,end)]
    random.shuffle(selected_idx)
    return sorted(selected_idx[0:num_items])

In [3]:
selected_idx = get_random_list(num_items=25, start=1, end=68, set_seed=123)
print('Selected subjects:', selected_idx)

Selected subjects: [2, 8, 13, 14, 15, 17, 19, 20, 23, 24, 27, 31, 32, 33, 34, 40, 42, 44, 46, 48, 50, 52, 59, 63, 65]


In [4]:
num_img_per_subj = 170
num_selfies = 10
list_of_img_end_paths = []

for subj_idx in selected_idx:
    temp_list = ['PIE//'+str(subj_idx)+'//'+str(i+1)+'.jpg' for i in range(0,num_img_per_subj)]
    list_of_img_end_paths.extend(temp_list)

selfies_list = ['selfies//formatted//'+str(i+1)+'.jpg' for i in range(0,num_selfies)]
list_of_img_end_paths.extend(selfies_list)
    
print('Number of selected images:', len(list_of_img_end_paths))

Number of selected images: 4260


### 01. Principal Component Analysis (PCA)

In [8]:
import numpy as np
import cv2
import os

In [9]:
# Randomly sample 500 images from the CMU PIE training set and your own photos
random_idx = get_random_list(num_items=500, start=0, end=len(list_of_img_end_paths), set_seed=123)
random_500_img_end_paths = [list_of_img_end_paths[i] for i in random_idx]

In [30]:
# read image
path = os.path.abspath('')

for counter, end_paths in enumerate(random_500_img_end_paths):
    # load image
    img = cv2.imread(os.path.join(path, end_paths))
    
    # vectorise image
    array_size = img.shape[0]*img.shape[1]
    vectorised_img = img.copy()
    vectorised_img = vectorised_img.reshape(array_size,3)
    vectorised_img = np.array([i[0] for i in vectorised_img], dtype=int)
    
    # add to dataset
    if counter == 0:
        data = vectorised_img.reshape(1,array_size).copy()
    else:
        data = np.concatenate((data, vectorised_img.reshape(1,array_size)), axis=0)
    
print('Vectorised and loaded data:', data.shape)

Vectorised and loaded data: (500, 1024)


In [32]:
from sklearn.decomposition import PCA

pca = PCA(n_components=2)
pca.fit(data)

print(pca.explained_variance_ratio_)
print(pca.singular_values_)

[0.41883119 0.26807402]
[23964.306498   19172.23390612]


In [33]:
pca = PCA(n_components=2)
pca.fit(data.T)

print(pca.explained_variance_ratio_)
print(pca.singular_values_)

[0.36458246 0.26052398]
[19325.41883314 16336.34456095]


In [57]:
### Vectorise images [Workings] ###

In [84]:
%%time
vectorised_img2 = np.empty(0, dtype=int)
for row in np.arange(img.shape[0]):
    for col in np.arange(img.shape[1]):
        vectorised_img2 = np.append(vectorised_img2, img[row, col][0])

Wall time: 18.9 ms


In [89]:
%%time
vectorised_img = img.copy()
vectorised_img = vectorised_img.reshape(img.shape[0]*img.shape[1],3)
vectorised_img = np.array([i[0] for i in vectorised_img], dtype=int)

Wall time: 0 ns


In [93]:
if list(vectorised_img2) == list(vectorised_img):
    print(True)

True


In [113]:
#data = np.empty(0, dtype=int)
data = np.concatenate((vectorised_img.reshape(1,1024), vectorised_img.reshape(1,1024)), axis=0)
data = np.concatenate((data, vectorised_img.reshape(1,1024)), axis=0)
data

array([[ 32,  39,  44, ..., 249, 255, 254],
       [ 32,  39,  44, ..., 249, 255, 254],
       [ 32,  39,  44, ..., 249, 255, 254]])

In [36]:
data

array([[135, 147, 147, ...,  40,  48,  54],
       [ 16,  44,  68, ...,  40,  54,  69],
       [110, 119, 109, ...,   6,   5,   5],
       ...,
       [ 55,  51,  57, ...,   7,   8,   8],
       [ 45,  48,  54, ...,  97,  96,  92],
       [199, 195, 191, ...,   5,   6,   3]])