# Age Estimation using SVM and LBP features
In this project, we will train an SVM to predict the age of a person using LBP features computed on the face image. We will use 

## Data
We will be using the IMDB Wiki dataset (https://data.vision.ee.ethz.ch/cvl/rrothe/imdb-wiki/) for this project. We have already cleaned up the dataset to remove the non-face images as well as images with erroneous ages (age > 100 or age < 0).

In [18]:
import numpy as np
import scipy.io as io
import cv2
import os
from datetime import date, datetime
from skimage import feature
import numpy as np
from sklearn.svm import LinearSVR
import glob
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import mean_absolute_error
import numpy as np
from sklearn.decomposition import PCA
import tqdm
import random
import math

# Load the Data

In [12]:
def retrieve_image_date(mat_contents):
    full_image_paths = mat_contents['imdb'][0][0][2][0]
    dobs = mat_contents['imdb'][0][0][0][0]
    years_taken = mat_contents['imdb'][0][0][1][0]

    dict_data = {}
    
    for dobs, years_taken, img_path in zip(dobs, years_taken, full_image_paths):
        img_path = img_path[0].split('/')[1]
        dict_data[img_path] = years_taken - datetime.fromordinal(int(dobs)).year
        
    return dict_data

In [None]:
def get_data_labels():
    image_path = "imdb_crop_resize"
    data = []
    labels = []
    image_paths = glob.glob(os.path.join(image_path, '*.jp*g'))
    random.shuffle(image_paths)
    for image_path in tqdm.tqdm(image_paths):
        # load the image, convert it to grayscale, and describe it
        image = cv2.imread(image_path)
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        hist = describe(gray)

        # extract the label from the image path, then update the label and data lists
        labels.append(int(dict_data[os.path.split(image_path)[1]]))
        data.append(hist)
    return data, labels

# LBP Features
Now, we will compute the Local Binary Pattern (LBP) features on a given image.

In [38]:
def describe(image, eps=1e-7):
        # compute the LBP representation of the image, and then use the LBP representation to build the histogram of patterns        
        lbp_image = feature.local_binary_pattern(image, 24, 4, method="uniform")
        w, h = lbp_image.shape
        
        block_size = 32
        num_blocks_w = int(w / block_size)
        num_blocks_h = int(h / block_size)

        hist_array = []
        
        # iterate over all blocks
        for i in range(num_blocks_h):
            for j in range(num_blocks_w):
                # this is the current block in the image 
                curr_block = lbp_image[i*block_size:(i+1)*block_size, j*block_size:(j+1)*block_size]
                
                # get it's histogram
                (hist, _) = np.histogram(curr_block, bins=np.arange(0, 17))
                
                # add it to the entire histogram of the image
                hist_array.append(hist)
        
        
        hist = np.array(hist_array)
        
        # make it flat (shape => [num_blocks_h*num_blocks_w*num_bins,])         
        # After this shape would be (1024,)
        hist = hist.ravel()
 
        # normalize the histogram
        hist = hist.astype("float")
        hist /= (hist.sum() + eps)
 
        # return the histogram of Local Binary Patterns
        return hist

# Grid Search
Let's choose the best set of parameters for our SVM by running a grid search.

In [47]:
def get_best_parameters(X_train, y_train, X_test, y_test):
    C_2d_range = [1e-2, 1, 1e2]
    gamma_2d_range = [1e-1, 1, 1e1]
    best_score = 0
    best_C = 1 
    best_gamma = 1
    best_classifier = None
    for C in C_2d_range:
        for gamma in gamma_2d_range:
            clf = svm.SVR(C=C, gamma=gamma).fit(X_train, y_train)
            score = clf.score(X_test, y_test)
            if (score > best_score):
                best_score = score
                best_gamma = gamma
                best_C = C
                best_classifier = clf
    return best_score, best_gamma, best_C, best_classifier

In [48]:
def get_error(classifier, X_test, y_test):
    prediction = classifier.predict(X_test)
    error = mean_absolute_error(y_test, prediction)
    return error        

In [49]:
def pca_transform(data):
    pca = PCA(n_components=512)
    pca.fit(data)
    data_pca = pca.transform(data)
    return data_pca

In [None]:
mat_contents = io.loadmat("imdb.mat")
dict_data = retrieve_image_date(mat_contents)
data, labels = get_data_labels()

100%|██████████| 460723/460723 [5:06:40<00:00, 25.04it/s]   


In [44]:
X_train, X_test, y_train, y_test = train_test_split (np.array(data), labels, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split (X_train, y_train, test_size=0.2, random_state=42)
score, gamma, C, classifier = get_best_parameters(X_train, y_train, X_val, y_val)
error = get_error(classifier, X_test, y_test)
print("Error Before PCA = ", error)

Error Before PCA =  10.005437032266032


In [45]:
pca_transform_data = pca_transform(np.array(data))
X_train, X_test, y_train, y_test = train_test_split (pca_transform_data, labels, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split (X_train, y_train, test_size=0.2, random_state=42)
score, gamma, C, classifier = get_best_parameters(X_train, y_train, X_val, y_val)
error = get_error(classifier, X_test, y_test)
print("Error After PCA = ", error)

Error After PCA =  10.00480897514508


# Result
we can see that, even after reducing 50% of features by doing PCA, we are able to achieve the same average error while having a much lower dimensional support vector machine.