In [1]:
from glob import glob
from sklearn.datasets import fetch_lfw_people
from clamv import *

import os
import cv2 
import numpy as np

# Combine Datasets

In [2]:
# Load our dataset first 
y = []
X = []
for path in glob('../final_dataset/*/*'):
    _, _, y1, name = path.split('/')
    x1 = cv2.imread(path)
    
    y.append(y1)
    X.append(x1)

In [3]:
# Load lfw dataset
output = fetch_lfw_people(color=True, min_faces_per_person=25, resize=1)
lfw_X = output.images
lfw_y = output.target

lfw_names = output.target_names

In [4]:
clmv_dict = {}
for idx, val in enumerate(np.unique(y)):
    clmv_dict[val] = idx

In [5]:
names = np.concatenate([np.unique(y), lfw_names])

In [6]:
for idx, n in enumerate(names):
    names[idx] = n.title()

In [7]:
clmv_y = []
for n in y:
    clmv_y.append(clmv_dict[n])

In [8]:
combined_y = np.concatenate([clmv_y, lfw_y + len(np.unique(y))])

In [9]:
# Resize our X to right size
resizer = Resizer(lfw_X.shape[2], lfw_X.shape[1])

In [10]:
clmv_resized_X = []
for img in X:
    clmv_resized_X.append(resizer(img))

In [11]:
clmv_resized_X = np.array(clmv_resized_X)

In [12]:
final_X = np.concatenate([clmv_resized_X, lfw_X])

In [13]:
np.save('clmv_X.npy', final_X)
np.save('clmv_y.npy', combined_y)

In [14]:
np.save('clmv_names.npy', names)

In [15]:
X_yuv = []

rgb2yuv = FormatConverter(source='RGB', dest='YUV', flip_axis=True)
for x in final_X:
    X_yuv.append(rgb2yuv(x))
    
X_yuv = np.array(X_yuv)
X_yuv = X_yuv.reshape(len(X_yuv), 3, -1)

# Train a Classifier

In [231]:
X = X_yuv
y = combined_y

In [232]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline

In [233]:
FisherFaces = Pipeline([
    ('pca', MultiDimensionalModel(PCA(n_components=150), dimensions=3)),
    ('lda', LDA()),
    ('classify', SVC(kernel='linear', class_weight='balanced', probability=True))
])

In [234]:
# Create holdout
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

In [235]:
FisherFaces.fit(X_train, y_train)

Pipeline(steps=[('pca', MultiDimensionalModel(dimensions=None,
           models=[PCA(copy=True, iterated_power='auto', n_components=150, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False), PCA(copy=True, iterated_power='auto', n_components=150, random_state=None,
  svd_solver='auto', tol=0.0, wh...',
  max_iter=-1, probability=True, random_state=None, shrinking=True,
  tol=0.001, verbose=False))])

In [236]:
from sklearn.metrics import accuracy_score
out = FisherFaces.predict(X_test)
accuracy_score(y_test, out)

0.79117647058823526

In [237]:
from sklearn.metrics import classification_report
print(classification_report(y_test, out, target_names=names))

                           precision    recall  f1-score   support

                     Brad       1.00      1.00      1.00         9
                    Piotr       1.00      1.00      1.00         5
                 Rhiannon       1.00      1.00      1.00         6
                   Sophie       1.00      1.00      1.00         7
                      Tom       1.00      1.00      1.00         5
         Alejandro Toledo       0.58      0.78      0.67         9
             Alvaro Uribe       0.67      0.57      0.62         7
             Andre Agassi       1.00      0.58      0.74        12
             Ariel Sharon       0.75      0.94      0.83        16
    Arnold Schwarzenegger       0.33      0.38      0.35         8
             Bill Clinton       0.75      0.43      0.55         7
             Colin Powell       0.76      0.94      0.84        65
            David Beckham       0.33      0.29      0.31         7
          Donald Rumsfeld       0.75      0.86      0.80     

# Now Test On Haar Cascades

In [238]:
%matplotlib inline
import matplotlib.pyplot as plt

In [134]:
# Run this through the Haar Cascade classify each output then, output if the correct classification is made
# Get each video from the dataset
video_locations = []
subjects = []
for v in glob('../walk_videos/*'):
    video_locations.append(v)
    
    video_name = v.split('/')[2]
    subject = video_name.split('_')[0]
    
    subjects.append(subject.title())

In [135]:
cascade = CascadeDetector('../recognition-pipeline/cascades/haarcascade_frontalface_default.xml')

In [138]:
video_frames = []
for path in video_locations:
    frames = []
    source = StaticSource(path, fps=25)
    
    print("Gather from: " + path)
    for f in source.__iter__():
        if f[0] == None:
            break
        frames.append(f[0])
        
    video_frames.append(frames)

Gather from: ../walk_videos/alen_1.mp4




Gather from: ../walk_videos/alen_2.mp4
Gather from: ../walk_videos/alen_3.mp4
Gather from: ../walk_videos/brad_1.mp4
Gather from: ../walk_videos/brad_2.mp4
Gather from: ../walk_videos/piotr_1.mp4
Gather from: ../walk_videos/rhiannon_1.mp4
Gather from: ../walk_videos/sophie_1.mp4
Gather from: ../walk_videos/sophie_2.mp4
Gather from: ../walk_videos/tom_1.mp4
Gather from: ../walk_videos/tom_2.mp4
Gather from: ../walk_videos/walk_all.mp4


In [157]:
subject_frames = []
video_frames = np.array(video_frames)

for video in video_frames:
    faces = []
    for frame in video:
        for _, x, y, w, h in cascade._detect(frame):
            faces.append(resizer(frame[y:y+h, x:x+w]))
    
    subject_frames.append(faces)

In [365]:
THRESHOLD = 0.50

In [366]:
decision = []
for faces in subject_frames:
    options = []
    for idx, face in enumerate(faces):
        new_face = rgb2yuv(face).reshape(1,3,-1)
        
        out = FisherFaces.predict_proba(new_face)
        if np.max(out) >= THRESHOLD:
            options.append((idx, names[np.argmax(out)]))
            
    decision.append(options)

In [367]:
decision

[[],
 [],
 [(38, 'Tom')],
 [(83, 'Brad'),
  (100, 'Brad'),
  (104, 'Brad'),
  (106, 'Brad'),
  (108, 'Brad'),
  (110, 'Brad'),
  (122, 'Brad')],
 [(23, 'Brad')],
 [],
 [(63, 'Vladimir Putin')],
 [],
 [],
 [(25, 'Tom'), (26, 'Tom'), (31, 'Tom')],
 [(41, 'Tom')],
 [(170, 'Tom'), (174, 'Tom'), (179, 'Tom'), (181, 'Tom')]]

In [361]:
len(names)

47