### 1. Testing the development environment

In [1]:
import cv2
import numpy as np
import dlib

# Checking the OpenCV version
print("OpenCV version", cv2.__version__)

# Checking the Numpy version
print("Numpy version", np.__version__)

# Checking the dlib version
print("Dlib version", dlib.__version__)




OpenCV version 3.1.0
Numpy version 1.13.1
Dlib version 19.6.1


### 2. Extracting Principal Components from an Image

We will be using the Yale Image dataset for this example

Link to the dataset : http://vismod.media.mit.edu/vismod/classes/mas622-00/datasets/

In [27]:
# Rendering PCA images

from PIL import Image
from numpy import *

def pca(X):
  # Principal Component Analysis
  # input: X, matrix with training data as flattened arrays in rows
  # return: projection matrix (with important dimensions first),
  # variance and mean

  #get dimensions
  num_data,dim = X.shape

  print(num_data, dim)

  #center data
  mean_X = X.mean(axis=0)

  print(mean_X.shape)
    
  #X = list(X)
  for i in range(num_data):
      X[i] -= mean_X

  if dim>100:
      print('PCA - compact trick used')
      M = dot(X,X.T) #covariance matrix
      e,EV = linalg.eigh(M) #eigenvalues and eigenvectors
      tmp = dot(X.T,EV).T #this is the compact trick
      V = tmp[::-1] #reverse since last eigenvectors are the ones we want
      S = sqrt(e)[::-1] #reverse since eigenvalues are in increasing order
  else:
      print('PCA - SVD used')
      U,S,V = linalg.svd(X)
      V = V[:num_data] #only makes sense to return the first num_data

  #return the projection matrix, the variance and the mean
  return V,S,mean_X

In [29]:
# Small Client Program for computing PCA
import glob
import os
import numpy as np
import matplotlib.image as mpimg

INPUT_IMG_DIR = os.path.join("..","..","datasets","yalefaces","yalefaces", "subject01*")

X = []
for image in glob.glob(INPUT_IMG_DIR):
    im = mpimg.imread(image)
    X.append(im.flatten())

X = np.array(X)

# Calculating the PCA of the above data
v,s,m = pca(X)



11 77760
(77760,)


TypeError: Cannot cast ufunc subtract output from dtype('float64') to dtype('uint8') with casting rule 'same_kind'

### 2. Eigen Face

In [3]:
class PCA:
    pass

In [4]:
import os
import glob
import numpy as np
from PIL import Image
import skimage.transform
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

%matplotlib inline

INPUT_IMG_DIR = os.path.join("..","..","datasets","yalefaces","yalefaces", "subject01*")
SIZE = 100 * 100    # w x h 

class EigenFace():
    
    def __init__(self, img_path,size=SIZE):
        self.image_bin = {}
        image_lst = [ image for image in glob.glob(img_path)]
        for image in image_lst:
            im = mpimg.imread(image)
            size = im.shape[0]*im.shape[1]
            
            # finding the downsample factor for the image
            d_sample_factor = size / SIZE
            im = skimage.transform.pyramid_reduce(im, downscale=d_sample_factor)
            
            # we will be storing the image and its pixel map in gray scale as a dictionary
            if len(im.shape) > 2:
                self.image_bin[image] = cv2.cvtColor(im,cv2.COLOR_RGB2GRAY)
            else:
                self.image_bin[image] = im
                
            # calculating the vector representation of the Images
        self.vector_matrix = self.get_vector()
            
            
                
    def get_vector(self):
        """
        Representing an Image Sample in the vector form and concatenating together
        """
        
        for idx, (_, image) in enumerate(self.image_bin.items()):
            if idx == 0:
                 vector_2d = self.image_bin[list(self.image_bin.keys())[0]].flatten()
            else:
                vector = image.flatten()
                vector_2d = np.concatenate((vector_2d.T, vector.T), axis=0)
        vector_2d = np.reshape(vector_2d, (len(self.image_bin), vector.size))
        return vector_2d
    
    
    def get_pca(self):
        """
        Getting the PCA of images
        """
        mean_vector = self.vector_matrix.mean(axis=0)
        for idx in range(self.vector_matrix.shape[0]):
            self.vector_matrix[idx] -= mean_vector
            
        u, s, eigen_vector = np.linalg.svd(np.dot(self.vector_matrix.T, self.vector_matrix))
        standard_deviation = s**2/float(len(s))
        variance_proportion = standard_deviation / np.sum(standard_deviation)
        pca = PCA()
        pca.s = s
        pca.eigen_vector = eigen_vector[:self.vector_matrix.shape[0]]
        pca.variance_proportion = variance_proportion
        pca.mean_vector = mean_vector
        return pca

### Principal Component Analysis

In [5]:
eigen_face = EigenFace(INPUT_IMG_DIR)

In [6]:
pca = eigen_face.get_pca()

In [14]:
print(pca.eigen_vector.shape)

(11, 1344)
