# Principle Component Analysis (PCA)

Code from Chapter 1, Section 1.3 *PCA of Images* of [**Programming Computer Vision**](http://programmingcomputervision.com/) by Jan Erik Solem.

Code has been modified to work with Python 3.x and also cleaned up use standarding aliases for numpy and matplotlib. 

In [None]:
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt

import pickle

import imtools

In [None]:
def pca(X):
    """ 
    Principal Component Analysis
        input: X, matrix with training data stored as flattened arrays in rows
        return: projection matrix (with important dimensions first), variance and mean
    """
    
    # get dimensions
    num_data, dim = X.shape
    
    # center data
    mean_X = X.mean(axis=0)
    X = X - mean_X
    
    if dim > num_data:
        # PCA - compact trick used
        M = np.dot(X, X.T)  # covariance matrix
        e, EV = np.linalg.eigh(M)  # eigenvalues and eigenvectors
        tmp = np.dot(X.T, EV).T  # this is the compact trick
        V = tmp[::-1]  # reverse since last eigenvectors are the ones we want
        S = sqrt(e)[::-1]  # reverse since eigenvalues are in increasing order
        for i in range(V.shape[1]):
            V[:, i] /= S
    else:
        # PCA - SVA used
        U, S, V = np.linalg.svd(X)
        V = V[:num_data]  # only makes sense to return the first num_data values
        
    # return the projection matrix, the variance and the mean
    return V, S, mean_X

Load image filenames from directory

In [None]:
imlist = imtools.get_imlist('data/a_thumbs/')

Get image size and number

In [None]:
im = np.array(Image.open(imlist[0]))
m, n = im.shape[0:2]
imnbr = len(imlist)
print(f"{imnbr} images of size ({m}, {n})")

Create matrix to store all flattened images

In [None]:
immatrix = np.array([np.array(Image.open(im)).flatten() for im in imlist], 'f')

Perform PCA

In [None]:
V, S, immean = pca(immatrix)

Show some images (mean and first 7 modes)

In [None]:
plt.figure()
plt.gray()
plt.subplot(2, 4, 1)
plt.imshow(immean.reshape(m, n))
plt.axis('off')
for i in range(7):
    plt.subplot(2, 4, i+2)
    plt.axis('off')
    plt.imshow(V[i].reshape(m, n))
plt.show()

Save mean and principal components

In [None]:
with open('font_pca_modes.pkl', 'wb') as f:
    pickle.dump(immean, f)
    pickle.dump(V, f)