In [4]:
import pandas as pd
import numpy as np
import os
import glob
from tqdm import tqdm
import seaborn as sns

import torch
import cv2
import matplotlib.pyplot as plt
from facenet_pytorch import MTCNN, InceptionResnetV1, extract_face
device = 'cuda:1' if torch.cuda.is_available() else 'cpu'
from pathlib import Path
from PIL import Image

In [5]:
margin =20
image_size = 160

# Load face detector
mtcnn = MTCNN(keep_all=False, select_largest=False, post_process=False,
              device=device,margin=margin, image_size=image_size,thresholds=[0.7, 0.7, 0.7])

# Load facial recognition model
resnet = InceptionResnetV1(pretrained='casia-webface', device=device).eval()

In [6]:
# extract a single face from a given photograph
def extract_face(filename, required_size=(160, 160)):
    # load image from file
    image = Image.open(filename)
    # convert to RGB, if needed
    image = image.convert('RGB')
    # convert to array
    pixels = np.asarray(image)
    # detect faces in the image
    face_array = mtcnn(pixels)
    if face_array==None:
        return None
    return face_array.permute(1,2,0).int().numpy()


In [27]:
image = Image.open("/home/server/Shahid/Datasets/Bollywood_celeb_dataset_100_classes/bollywood_celeb_faces_0/Aamir_Khan/5.jpg")
image.size

(670, 529)

In [28]:
def load_face(dir):
    faces = list()
    # enumerate files
    for filename in os.listdir(dir):
        path = dir + '/'+ filename
        if (".jpg" not in filename) and (".png" not in filename):
            continue
        face = extract_face(path)
        if face is None:
            continue
        faces.append(face) 
    return faces

def load_dataset(dir):
    # list for faces and labels
    X, y = list(), list()
    for subdir in os.listdir(dir):
        path =dir +'/'+subdir
        faces = load_face(path)
        labels = [subdir for i in range(len(faces))]
        print("loaded %d sample for class: %s" % (len(faces),subdir) ) # print progress
        X.extend(faces)
        y.extend(labels)
    return np.asarray(X), np.asarray(y)


# load train dataset
trainX1, trainy1 = load_dataset('/home/server/Shahid/Datasets/Bollywood_celeb_dataset_100_classes/bollywood_celeb_faces_0')
print(trainX1.shape, trainy1.shape)

trainX2, trainy2 = load_dataset('/home/server/Shahid/Datasets/Bollywood_celeb_dataset_100_classes/bollywood_celeb_faces_1')
print(trainX2.shape, trainy2.shape)

trainX3, trainy3 = load_dataset('/home/server/Shahid/Datasets/Bollywood_celeb_dataset_100_classes/bollywood_celeb_faces2')
print(trainX3.shape, trainy3.shape)

# save and compress the dataset for further use
#np.savez_compressed('5-celebrity-faces-dataset.npz', trainX, trainy, testX, testy)

loaded 74 sample for class: Bobby_Deol
loaded 83 sample for class: Akshaye_Khanna
loaded 114 sample for class: Ameesha_Patel
loaded 121 sample for class: Bipasha_Basu
loaded 97 sample for class: Anil_Kapoor
loaded 110 sample for class: Ajay_Devgn
loaded 97 sample for class: Arjun_Rampal
loaded 144 sample for class: Ileana_D+У+З+ЦCruz
loaded 110 sample for class: Aftab_Shivdasani
loaded 128 sample for class: Huma_Qureshi
loaded 96 sample for class: Bhumi_Pednekar
loaded 157 sample for class: Anushka_Sharma
loaded 144 sample for class: Anushka_Shetty
loaded 113 sample for class: Ayushmann_Khurrana
loaded 134 sample for class: Amitabh_Bachchan
loaded 147 sample for class: Asin
loaded 122 sample for class: Amy_Jackson
loaded 75 sample for class: Abhay_Deol
loaded 121 sample for class: Amrita_Rao
loaded 77 sample for class: Govinda
loaded 146 sample for class: Disha_Patani
loaded 156 sample for class: Aishwarya_Rai
loaded 103 sample for class: Abhishek_Bachchan




loaded 105 sample for class: Arshad_Warsi
loaded 111 sample for class: Arjun_Kapoor
loaded 133 sample for class: Aamir_Khan
loaded 95 sample for class: Emraan_Hashmi
loaded 79 sample for class: Farhan_Akhtar
loaded 193 sample for class: Deepika_Padukone
loaded 110 sample for class: Esha_Gupta




loaded 115 sample for class: Hrithik_Roshan
loaded 164 sample for class: Alia_Bhatt
loaded 122 sample for class: Akshay_Kumar
(3896, 160, 160, 3) (3896,)
loaded 126 sample for class: Parineeti_Chopra
loaded 124 sample for class: Karisma_Kapoor
loaded 134 sample for class: Kriti_Kharbanda
loaded 156 sample for class: Katrina_Kaif
loaded 143 sample for class: Madhuri_Dixit
loaded 115 sample for class: Juhi_Chawla
loaded 137 sample for class: Kajol
loaded 130 sample for class: Kangana_Ranaut
loaded 118 sample for class: Ranbir_Kapoor
loaded 136 sample for class: Priyanka_Chopra
loaded 150 sample for class: Pooja_Hegde
loaded 131 sample for class: Kiara_Advani
loaded 138 sample for class: Prabhas
loaded 133 sample for class: Preity_Zinta
loaded 119 sample for class: John_Abraham
loaded 159 sample for class: Jacqueline_Fernandez
loaded 117 sample for class: Irrfan_Khan
loaded 123 sample for class: Nargis_Fakhri
loaded 86 sample for class: Lara_Dutta
loaded 82 sample for class: Paresh_Rawal




loaded 144 sample for class: Vidya_Balan
loaded 100 sample for class: Vicky_Kaushal
loaded 117 sample for class: Richa_Chadda
loaded 99 sample for class: Saif_Ali_Khan
loaded 129 sample for class: Shruti_Haasan
loaded 114 sample for class: Shraddha_Kapoor
loaded 140 sample for class: Shilpa_Shetty
loaded 80 sample for class: Vivek_Oberoi
loaded 139 sample for class: Zareen_Khan
loaded 152 sample for class: Vaani_Kapoor
loaded 111 sample for class: Sara_Ali_Khan
loaded 118 sample for class: Shah_Rukh_Khan
loaded 81 sample for class: Sunny_Deol
loaded 99 sample for class: Sidharth_Malhotra
loaded 82 sample for class: Riteish_Deshmukh
loaded 116 sample for class: Tabu
loaded 124 sample for class: Ranveer_Singh
loaded 176 sample for class: Tamannaah_Bhatia
loaded 139 sample for class: Taapsee_Pannu
loaded 109 sample for class: Randeep_Hooda
loaded 141 sample for class: Shahid_Kapoor
loaded 72 sample for class: Uday_Chopra
loaded 71 sample for class: Suniel_Shetty
loaded 140 sample for clas

In [31]:
trainX=np.concatenate((trainX1,trainX2,trainX3), axis=0)
trainy=np.concatenate((trainy1,trainy2,trainy3), axis=0)

In [33]:
trainy.shape

(11803,)

In [2]:
np.savez_compressed('100_Bollywood_celebrity_faces_dataset.npz',trainX,trainy)

NameError: name 'trainX' is not defined

In [7]:
# load the face dataset
data = np.load('100_Bollywood_celebrity_faces_dataset.npz')
trainX, trainy = data['arr_0'], data['arr_1']
print('Loaded: ', trainX.shape, trainy.shape)

Loaded:  (11803, 160, 160, 3) (11803,)


In [8]:
trainy.shape

(11803,)

In [9]:
from sklearn.model_selection import train_test_split

trainX,testX,trainy,testy=train_test_split(trainX,trainy, test_size=0.33, random_state=42,stratify=trainy)

KeyboardInterrupt: 

In [None]:
from torchvision.transforms import ToTensor
tf_img = lambda i: ToTensor()(i).unsqueeze(0).float()
embeddings = lambda input: resnet(input)

In [None]:
# convert each face in the train set into embedding
emdTrainX = list()
for face in trainX:
    #mean, std = face.mean(), face.std()
    #face = (face-mean)/std
    face=face/255
    t = tf_img(face).to(device)
    emd = embeddings(t).squeeze().cpu().tolist()
    emdTrainX.append(emd)
    #print(len(emd))
    
emdTrainX = np.asarray(emdTrainX)
print(emdTrainX.shape)

# convert each face in the test set into embedding
emdTestX = list()
for face in testX:
    mean, std = face.mean(), face.std()
    #face = (face-mean)/std
    face=face/255
    t = tf_img(face).to(device)
    emd = embeddings(t).squeeze().cpu().tolist()
    emdTestX.append(emd)
emdTestX = np.asarray(emdTestX)
print(emdTestX.shape)

# save arrays to one file in compressed format
np.savez_compressed('100-Bollywood_celebrity-faces-embeddings.npz', emdTrainX, trainy, emdTestX, testy)

In [None]:
data=np.load("100-Bollywood_celebrity-faces-embeddings.npz")

In [None]:
emdTrainX, trainy, emdTestX, testy=data["arr_0"],data["arr_1"],data["arr_2"],data["arr_3"]

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import Normalizer
from sklearn.svm import SVC

print("Dataset: train=%d, test=%d" % (emdTrainX.shape[0], emdTestX.shape[0]))
# normalize input vectors
in_encoder = Normalizer()
emdTrainX_norm=emdTrainX#no effect of normalizer
emdTestX_norm=emdTestX
#emdTrainX_norm = in_encoder.transform(emdTrainX)
#emdTestX_norm = in_encoder.transform(emdTestX)
# label encode targets
out_encoder = LabelEncoder()
out_encoder.fit(trainy)
trainy_enc = out_encoder.transform(trainy)
testy_enc = out_encoder.transform(testy)
# fit model
model = SVC(kernel='linear', probability=True)
model.fit(emdTrainX_norm, trainy_enc)
# predict
yhat_train = model.predict(emdTrainX_norm)
yhat_test = model.predict(emdTestX_norm)
# score
score_train = accuracy_score(trainy_enc, yhat_train)
score_test = accuracy_score(testy_enc, yhat_test)
# summarize
print('Accuracy: train=%.3f, test=%.3f' % (score_train*100, score_test*100))

In [None]:
from random import choice
# select a random face from test set
selection = choice([i for i in range(testX.shape[0])])
random_face = testX[selection]
random_face_emd = emdTestX_norm[selection]
random_face_class = testy_enc[selection]
random_face_name = out_encoder.inverse_transform([random_face_class])

# prediction for the face
samples = np.expand_dims(random_face_emd, axis=0)
yhat_class = model.predict(samples)
yhat_prob = model.predict_proba(samples)
# get name
class_index = yhat_class[0]
class_probability = yhat_prob[0,class_index] * 100
predict_names = out_encoder.inverse_transform(yhat_class)
all_names = out_encoder.inverse_transform(list(np.unique(trainy_enc)))
#print('Predicted: %s (%.3f)' % (predict_names[0], class_probability))
#print('Predicted: \n%s \n%s' % (all_names, yhat_prob[0]*100))
print('Expected: %s' % random_face_name[0])
print("predicted:",predict_names[0])
# plot face
plt.imshow(random_face)
title = '%s (%.3f)' % (predict_names[0], class_probability)
plt.title(title)
plt.show()