In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import cv2
from scipy.spatial import distance
from matplotlib import pyplot as plt
from keras.models import load_model
from PIL import Image
from sklearn.model_selection import train_test_split
import os

In [2]:
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import Normalizer
from sklearn.svm import SVC
import pickle


In [3]:
#loading haarcascade_frontalface_default.xml
face_model = cv2.CascadeClassifier('../input/haarcascades/haarcascade_frontalface_default.xml')

In [4]:
# extract a single face from a given photograph

def extract_face(filename, required_size=(160, 160)):
    # load image from file
    image = Image.open(filename)
    
    # convert to RGB, if needed
    image = image.convert('RGB')
    
    # convert to array
    pixels = np.asarray(image)
    
    # resize pixels to the model size
    image = Image.fromarray(pixels)
    image = image.resize(required_size)
    face_array = np.asarray(image)
    
    return face_array

In [5]:
def load_face(dir):
    faces = list()
    paths = list()
    
    # enumerate files
    for filename in os.listdir(dir):
        path = dir + filename
        face = extract_face(path)
        faces.append(face)
        paths.append(path)
    
    return faces, paths

def load_dataset(dir):
    # list for faces and labels
    X, y, paths, t_paths = list(), list(), list(), list()
    
    for subdir in os.listdir(dir):
        path = dir + subdir + '/'
        faces, paths = load_face(path)
        t_paths = t_paths + paths
        labels = [subdir for i in range(len(faces))]
        X.extend(faces)
        y.extend(labels)
        
    return np.asarray(X), np.asarray(y), t_paths

In [6]:
# load train dataset

trainX, trainy, train_paths = load_dataset('C:/Users/admin/Prefinal Project/Dataset Prefinal/Training/')
print(trainX.shape, trainy.shape)

UnidentifiedImageError: cannot identify image file 'C:/Users/admin/Prefinal Project/Dataset Prefinal/Training/Kavya/IMG_6680.jpg'

In [None]:
# load test dataset

testX, testy, test_paths = load_dataset('C:/Users/admin/Prefinal Project/Dataset Prefinal/Testing/')
print(testX.shape, testy.shape, len(test_paths))

In [None]:
# save and compress the dataset for further use
np.savez_compressed('new_modified_masked_face.npz', trainX, trainy, testX, testy)
print('Done with Compression')
#del trainX, trainy, testX, testy

In [None]:
data = np.load('new_modified_masked_face.npz')
trainX, trainy, testX, testy = data['arr_0'], data['arr_1'], data['arr_2'], data['arr_3']
print('Loaded: ', trainX.shape, trainy.shape, testX.shape, testy.shape)
del data

In [None]:
trainx, valid = train_test_split(trainX, test_size=0.4, random_state=42, shuffle=True)
del trainX
print('Split Training Data')

In [None]:
print("number of image in train dataset : %s" %(len(trainx)))

print("number of image in train dataset : %s" %(len(valid)))

In [None]:
y_train, y_valid = train_test_split(trainy, test_size=0.4, random_state=42, shuffle=True)
del trainy
print('Split Training Data')

In [None]:
print("number of image in train dataset : %s" %(len(y_train)))

print("number of image in train dataset : %s" %(len(y_valid)))

In [None]:
# save and compress the dataset for further use
np.savez_compressed('modified_extracted_masked_unmasked.npz', trainx, y_train, valid, y_valid,testX, testy)
del trainx, y_train, valid, y_valid, testX, testy
print('Done Compressing')

In [None]:
data = np.load('modified_extracted_masked_unmasked.npz')
print(data.files)
trainx, y_train, valid, y_valid,testX, testy = data['arr_0'], data['arr_1'], data['arr_2'], data['arr_3'], data['arr_4'], data['arr_5']
print('Loaded: ', trainx.shape, y_train.shape, valid.shape, y_valid.shape,testX.shape, testy.shape)
del data

In [None]:
facenet_model = load_model('keras-facenet/model/facenet_keras.h5')
print('Loaded Model')

In [None]:
print('Loaded: ', trainx.shape, y_train.shape, valid.shape, y_valid.shape,testX.shape, testy.shape)

In [None]:
def get_embedding(model, face):
    # scale pixel values
    face = face.astype('float32')
    
    # standardization
    mean, std = face.mean(), face.std()
    face = (face-mean)/std
    
    # transfer face into one sample (3 dimension to 4 dimension)
    sample = np.expand_dims(face, axis=0)
    
    # make prediction to get embedding
    yhat = model.predict(sample)
    
    return yhat[0]

In [None]:
emdTrainX = list()

for face in trainx:
    emd = get_embedding(facenet_model, face)
    emdTrainX.append(emd)
    
emdTrainX = np.asarray(emdTrainX)
print(emdTrainX.shape)

embValid = list()

for face in valid:
    emd = get_embedding(facenet_model,face)
    embValid.append(emd)
    
embValid = np.asarray(embValid)
print(embValid.shape)

In [None]:
emdTestX = list()

for face in testX:
    emd = get_embedding(facenet_model, face)
    emdTestX.append(emd)
    
emdTestX = np.asarray(emdTestX)
print(emdTestX.shape)

In [None]:
# save arrays to one file in compressed format
np.savez_compressed('embeddings_masked.npz', emdTrainX, y_train, embValid, y_valid, emdTestX, testy)
del emdTrainX, y_train, embValid, y_valid, emdTestX, testy
print('Here')

In [None]:
data = np.load('embeddings_masked.npz')
print(data.files)
emdTrainX, y_train, embValid, y_valid, emdTestX, testy = data['arr_0'], data['arr_1'], data['arr_2'], data['arr_3'], data['arr_4'], data['arr_5']

In [None]:
print('Loaded: ', emdTrainX.shape, y_train.shape, embValid.shape, y_valid.shape, emdTestX.shape, testy.shape)

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import Normalizer
from sklearn.svm import SVC
import pickle

print("Dataset: train=%d,validation = %d, test=%d" % (emdTrainX.shape[0],embValid.shape[0] ,emdTestX.shape[0]))

# normalize input vectors
in_encoder = Normalizer(norm='l2')
emdTrainX_norm = in_encoder.transform(emdTrainX)
embValid_norm = in_encoder.transform(embValid)
emdTestX_norm = in_encoder.transform(emdTestX)

# label encode targets
out_encoder = LabelEncoder()
encoder_arr = np.append (y_train, 'wangnan')
out_encoder.fit(encoder_arr)

In [None]:
trainy_enc = out_encoder.transform(y_train)
y_valid_enc = out_encoder.transform(y_valid)
testy_enc = out_encoder.transform(testy)

In [None]:
from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier()
model = model.fit(emdTrainX_norm,trainy_enc)
yhat = model.predict(emdTestX_norm)
yhat_valid = model.predict(embValid_norm)
score_test = accuracy_score(testy_enc, yhat)
score_valid = accuracy_score(y_valid_enc, yhat_valid)
print('Accuracy: test=%.3f' % (score_test*100))
print('Validation accuracy=%.3f' % (score_valid*100))

In [None]:
from xgboost import XGBClassifier
model = XGBClassifier()
model = model.fit(emdTrainX_norm,trainy_enc)
yhat = model.predict(emdTestX_norm)
score_test = accuracy_score(testy_enc, yhat)
print('Accuracy: test=%.3f' % (score_test*100))

In [None]:
model = SVC(kernel='linear', probability=True)
model.fit(emdTrainX_norm, trainy_enc)

In [None]:
# predict
yhat_valid = model.predict(embValid_norm)
yhat_test = model.predict(emdTestX_norm)

# score
score_valid = accuracy_score(y_valid_enc, yhat_valid)
score_test = accuracy_score(testy_enc, yhat_test)

# summarize
print('Accuracy: train=%.3f, test=%.3f' % (score_valid*100, score_test*100))

In [None]:
from sklearn.linear_model import LogisticRegression



lr = LogisticRegression(max_iter=1000)
lr.fit(emdTrainX_norm, trainy_enc)
score_test = accuracy_score(testy_enc, yhat_test)



print('Accuracy: test=%.3f' % (score_test*100))


In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

rf = RandomForestClassifier(n_estimators = 400, random_state = 1)
rf.fit(emdTrainX_norm, trainy_enc)
score_test = accuracy_score(testy_enc, yhat_test)
print('Accuracy: test=%.3f' % (score_test*100))

In [None]:
#Save the model
filename = 'linear.pkl'
pickle.dump(model, open(filename, 'wb'))

In [None]:
loaded_model = pickle.load(open('linear.pkl', 'rb'))

In [None]:
from random import choice

for i in range(50):
    
    # select a random face from test set
    selection = choice([i for i in range(testX.shape[0])]) 
    random_face = testX[selection]
    random_face_emd = emdTestX_norm[selection]
    random_face_class = testy_enc[selection]
    random_face_name = out_encoder.inverse_transform([random_face_class])
    
    # prediction for the face
    samples = np.expand_dims(random_face_emd, axis=0)
    yhat_class = loaded_model.predict(samples)
    yhat_prob = loaded_model.predict_proba(samples)
    class_index = yhat_class[0]
    
    if class_index <= 10:
        # get name
        class_probability = yhat_prob[0,class_index] * 100
        predict_names = out_encoder.inverse_transform(yhat_class)
        
        if random_face_name[0] == predict_names[0]:
            print('Predicted: %s (%.3f)' % (predict_names[0], class_probability))
            print('Expected: %s' % random_face_name[0])
            
            # plot face
            plt.imshow(random_face)
            title = '%s (%.3f)' % (predict_names[0], class_probability)
            plt.title(title)
            plt.show()