In [1]:
!unzip dataset.zip

Archive:  dataset.zip
  inflating: data/train/ben_afflek/httpcsvkmeuaeccjpg.jpg  
  inflating: data/train/ben_afflek/httpimagesfandangocomrImageRendererredesignstaticimgnoxportraitjpgpcpcpcimagesmasterrepositoryperformerimagespjpg.jpg  
  inflating: data/train/ben_afflek/httpssmediacacheakpinimgcomxdbbdbbbececacdecdcdfjpg.jpg  
  inflating: data/train/ben_afflek/httpssmediacacheakpinimgcomxdfdfadcfeabjpg.jpg  
  inflating: data/train/ben_afflek/httpssmediacacheakpinimgcomxedaedabcbefbcbabbjpg.jpg  
  inflating: data/train/ben_afflek/httpssmediacacheakpinimgcomxeebdfdbaaajpg.jpg  
  inflating: data/train/ben_afflek/httpssmediacacheakpinimgcomxeeedcacddeccccacfjpg.jpg  
  inflating: data/train/ben_afflek/httpsuploadwikimediaorgwikipediacommonsthumbddBenAffleckbyGageSkidmorejpgpxBenAffleckbyGageSkidmorejpg.jpg  
  inflating: data/train/ben_afflek/httptrwebimgacstanetcxbdddmediasnmediajpg.jpg  
  inflating: data/train/ben_afflek/httpwwwaceshowbizcomimagesphotobenaffleckjpg.jpg  
  inflatin

In [2]:
pip install mtcnn

Collecting mtcnn
  Downloading mtcnn-0.1.1-py3-none-any.whl (2.3 MB)
[K     |████████████████████████████████| 2.3 MB 5.5 MB/s 
Installing collected packages: mtcnn
Successfully installed mtcnn-0.1.1


In [9]:
from os import listdir
import os
from PIL import Image
from numpy import asarray
from matplotlib import pyplot
from mtcnn.mtcnn import MTCNN
from numpy.lib.npyio import savez_compressed
import pickle

Detecting faces using MTCNN and extracting them

In [6]:
def extract_faces(filename, required_size=(160, 160)):
  image= Image.open(filename)
  image= image.convert("RGB")
  pixels= asarray(image)
  detector= MTCNN()
  results= detector.detect_faces(pixels)
  x1, y1, width, height= results[0]['box']
  x1, y1= abs(x1), abs(y1)
  x2, y2= x1+width, y1+height
  face= pixels[y1:y2, x1:x2]
  image= Image.fromarray(face)
  image= image.resize(required_size)
  face_array= asarray(image)
  return face_array

def load_faces(directory):
  faces= list()
  for filename in listdir(directory):
    path= directory+ filename
    face= extract_faces(path)
    faces.append(face)
  return faces 

def load_dataset(directory):
  X, y= list(), list()
  for subdir in listdir(directory):
    path= directory+ subdir+ '/'
    if not os.path.isdir(path):
      continue
    faces= load_faces(path)
    labels= [subdir for _ in range(len(faces))]
    print('>loaded %d examples for class: %s' %(len(faces), subdir))
    X.extend(faces)
    y.extend(labels)
  return asarray(X), asarray(y)

trainX, trainy= load_dataset('/content/train/')
print(trainX.shape, trainy.shape)

testX, testy= load_dataset('/content/val/')

savez_compressed('Celebrity faces dataset.npz', trainX, trainy, testX, testy)

>loaded 17 examples for class: elton_john
>loaded 22 examples for class: mindy_kaling
>loaded 14 examples for class: ben_afflek
>loaded 21 examples for class: jerry_seinfeld
>loaded 19 examples for class: madonna
(93, 160, 160, 3) (93,)
>loaded 5 examples for class: elton_john
>loaded 5 examples for class: mindy_kaling
>loaded 5 examples for class: ben_afflek
>loaded 5 examples for class: jerry_seinfeld
>loaded 5 examples for class: madonna


In [7]:
from numpy import load
from numpy import expand_dims
from numpy import asarray
from numpy import savez_compressed
from keras.models import load_model

Creating face embeddings using FaceNet

In [10]:
def get_embedding(model, face_pixels):
	face_pixels = face_pixels.astype('float32')
	mean, std = face_pixels.mean(), face_pixels.std()
	face_pixels = (face_pixels - mean) / std
	samples = expand_dims(face_pixels, axis=0)
	yhat = model.predict(samples)
	return yhat[0]

data= load('Celebrity faces dataset.npz')
trainX, trainy, testX, testy = data['arr_0'], data['arr_1'], data['arr_2'], data['arr_3']
print('Loaded: ', trainX.shape, trainy.shape, testX.shape, testy.shape)
model = load_model('facenet_keras.h5')
print('Loaded Model')

Loaded:  (93, 160, 160, 3) (93,) (25, 160, 160, 3) (25,)
Loaded Model


In [12]:
newTrainX = list()
for face_pixels in trainX:
	embedding = get_embedding(model, face_pixels)
	newTrainX.append(embedding)
newTrainX = asarray(newTrainX)
print(newTrainX.shape)

newTestX = list()
for face_pixels in testX:
	embedding = get_embedding(model, face_pixels)
	newTestX.append(embedding)
newTestX = asarray(newTestX)
print(newTestX.shape)

savez_compressed('Celebrity-faces-embeddings.npz', newTrainX, trainy, newTestX, testy)

(93, 128)
(25, 128)


Face Classification using SVC

In [14]:
data = load('Celebrity-faces-embeddings.npz')
trainX, trainy, testX, testy = data['arr_0'], data['arr_1'], data['arr_2'], data['arr_3']
print('Dataset: train=%d, test=%d' % (trainX.shape[0], testX.shape[0]))

Dataset: train=93, test=25


In [21]:
from numpy import load
from numpy import expand_dims
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import Normalizer
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

In [17]:
in_encoder = Normalizer(norm='l2')
trainX = in_encoder.transform(trainX)
testX = in_encoder.transform(testX)
out_encoder = LabelEncoder()
out_encoder.fit(trainy)
trainy = out_encoder.transform(trainy)
testy = out_encoder.transform(testy)

In [19]:
model = SVC(kernel='linear', probability=True)
model.fit(trainX, trainy)

pickle.dump(model, open('model.pickle', 'wb'))

In [22]:
yhat_train = model.predict(trainX)
yhat_test = model.predict(testX)

score_train = accuracy_score(trainy, yhat_train)
score_test = accuracy_score(testy, yhat_test)

print('Accuracy: train=%.2f, test=%.2f' % (score_train*100, score_test*100))

Accuracy: train=100.00, test=100.00


Testing model on random examples from test dataset

In [25]:
from random import choice
selection = choice([i for i in range(testX.shape[0])])
random_face_pixels = testX[selection]
random_face_emb = testX[selection]
random_face_class = testy[selection]
random_face_name = out_encoder.inverse_transform([random_face_class])

samples = expand_dims(random_face_emb, axis=0)
yhat_class = model.predict(samples)
yhat_prob = model.predict_proba(samples)

class_index = yhat_class[0]
class_probability = yhat_prob[0,class_index] * 100
predict_names = out_encoder.inverse_transform(yhat_class)
print('Predicted: %s (%.3f)' % (predict_names[0], class_probability))
print('Expected: %s' % random_face_name[0])

Predicted: elton_john (91.397)
Expected: elton_john
