In [1]:
from glob import glob
from sklearn.datasets import fetch_lfw_people
from clamv import *

import os
import cv2 
import numpy as np

# Combine Datasets

In [2]:
# Load our dataset first 
y = []
X = []
for path in glob('../final_dataset/*/*'):
    _, _, y1, name = path.split('/')
    x1 = cv2.imread(path)
    
    y.append(y1)
    X.append(x1)

In [3]:
# Load lfw dataset
output = fetch_lfw_people(color=True, min_faces_per_person=25, resize=1)
lfw_X = output.images
lfw_y = output.target

lfw_names = output.target_names

In [4]:
clmv_dict = {}
for idx, val in enumerate(np.unique(y)):
    clmv_dict[val] = idx

In [5]:
names = np.concatenate([np.unique(y), lfw_names])

In [6]:
for idx, n in enumerate(names):
    names[idx] = n.title()

In [7]:
clmv_y = []
for n in y:
    clmv_y.append(clmv_dict[n])

In [8]:
combined_y = np.concatenate([clmv_y, lfw_y + len(np.unique(y))])

In [9]:
# Resize our X to right size
resizer = Resizer(lfw_X.shape[2], lfw_X.shape[1])

In [10]:
clmv_resized_X = []
for img in X:
    clmv_resized_X.append(resizer(img))

In [11]:
clmv_resized_X = np.array(clmv_resized_X)

In [14]:
final_X = np.concatenate([clmv_resized_X, lfw_X])

In [15]:
np.save('clmv_X.npy', final_X)
np.save('clmv_y.npy', combined_y)

In [16]:
np.save('clmv_names.npy', names)

In [35]:
X_yuv = []

rgb2yuv = FormatConverter(source='RGB', dest='YUV', flip_axis=True)
for x in final_X:
    X_yuv.append(rgb2yuv(x))
    
X_yuv = np.array(X_yuv)
X_yuv = X_yuv.reshape(len(X_yuv), 3, -1)

# Train a Classifier

In [36]:
X = X_yuv
y = combined_y

In [38]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline

In [39]:
FisherFaces = Pipeline([
    ('pca', MultiDimensionalModel(PCA(n_components=150), dimensions=3)),
    ('lda', LDA()),
    ('classify', SVC(kernel='linear', class_weight='balanced'))
])

In [58]:
# Create holdout
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

In [59]:
FisherFaces.fit(X_train, y_train)

Pipeline(steps=[('pca', MultiDimensionalModel(dimensions=None,
           models=[PCA(copy=True, iterated_power='auto', n_components=150, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False), PCA(copy=True, iterated_power='auto', n_components=150, random_state=None,
  svd_solver='auto', tol=0.0, wh...,
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False))])

In [60]:
from sklearn.metrics import accuracy_score
out = FisherFaces.predict(X_test)
accuracy_score(y_test, out)

0.79264705882352937

In [61]:
from sklearn.metrics import classification_report
print(classification_report(y_test, out, target_names=names))

                           precision    recall  f1-score   support

                     Brad       1.00      1.00      1.00         9
                    Piotr       1.00      1.00      1.00         5
                 Rhiannon       1.00      1.00      1.00         6
                   Sophie       1.00      1.00      1.00         7
                      Tom       1.00      1.00      1.00         5
         Alejandro Toledo       0.56      0.56      0.56         9
             Alvaro Uribe       0.60      0.43      0.50         7
             Andre Agassi       0.88      0.58      0.70        12
             Ariel Sharon       0.75      0.94      0.83        16
    Arnold Schwarzenegger       0.27      0.38      0.32         8
             Bill Clinton       0.60      0.43      0.50         7
             Colin Powell       0.76      0.91      0.83        65
            David Beckham       0.43      0.43      0.43         7
          Donald Rumsfeld       0.74      0.89      0.81     

# Now Test On Haar Cascades