In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [3]:
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense,Convolution2D,MaxPooling2D,Flatten,Conv2D,Dropout
from keras.preprocessing.image import ImageDataGenerator
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
from PIL import Image
import tensorflow as tf
import matplotlib.pyplot as plt

# Using mtcnn to detect faces - then facenet-keras to get a face embedding - finally using decision tree classifier on the produced dataset

In [4]:
# view the data

im=Image.open('../input/yale-face-database/subject09.leftlight')

im

In [None]:
# # loading kaeras facenet model
# from keras.models import load_model
# # load the model
# model = load_model('../input/facenet-keras/facenet_keras.h5')
# # summarize input and output shape
# print(model.inputs)
# print(model.outputs)

In [5]:
!pip install mtcnn

In [6]:
# used to detect a face
import mtcnn
# print version
print(mtcnn.__version__)

In [8]:
# function for face detection with mtcnn
from numpy import asarray
from mtcnn.mtcnn import MTCNN

# extract a single face from a given photograph
def extract_face(filename, required_size=(160, 160)):
    # load image from file
    image = Image.open(filename)
    # convert to RGB, if needed
    image = image.convert('RGB')
    # convert to array
    pixels = asarray(image)
    # create the detector, using default weights
    detector = MTCNN()
    # detect faces in the image
    results = detector.detect_faces(pixels)
    # extract the bounding box from the first face
    x1, y1, width, height = results[0]['box']
    # bug fix
    x1, y1 = abs(x1), abs(y1)
    x2, y2 = x1 + width, y1 + height
    # extract the face
    face = pixels[y1:y2, x1:x2]
    # resize pixels to the model size
    image = Image.fromarray(face)
    image = image.resize(required_size)
    face_array = asarray(image)
    return face_array

In [9]:
from os import listdir
# load images and extract faces for a given subject
def load_faces(subject):
    faces = list()
    folder = '../input/yale-face-database/'
    # enumerate files
    for filename in listdir(folder):
        if filename.split(".")[0] == 'subject' + subject:
            # path
            path = folder + filename
            # get face
            face = extract_face(path)
            # store
            faces.append(face)
    return faces

In [10]:
# loads data and returns testing and training lists
def load_dataset():
    subjects = ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12', '13', '14', '15']
    X_train, y_train, X_test, y_test = list(), list(), list(), list()
    count = 0
    
    for subject in subjects:
        faces = load_faces(subject)
        labels = [subject for _ in range(len(faces))]
         # summarize progress
        print('>loaded %d examples for class: %s' % (len(faces), subject))
        # store

        X_train.extend(faces[:7])
        y_train.extend(labels[:7])
        
        X_test.extend(faces[7:])
        y_test.extend(labels[7:])

            
            
    return asarray(X_train), asarray(y_train), asarray(X_test), asarray(y_test)

In [11]:
from os.path import isdir
from numpy import expand_dims
from numpy import asarray
from numpy import savez_compressed
# load train and test data
trainX, trainy, testX, testy = load_dataset()
print(trainX.shape, trainy.shape)
# save arrays to one file in compressed format
savez_compressed('./yale-face-database.npz', trainX, trainy, testX, testy)

In [12]:
print(trainX.shape)
print(trainy.shape)
print(testX.shape)
print(testy.shape)

In [13]:
# pre computing face embeddings
from numpy import load
from numpy import expand_dims
from numpy import asarray
from numpy import savez_compressed
from keras.models import load_model
 
# get the face embedding for one face
def get_embedding(model, face_pixels):
    # scale pixel values
    face_pixels = face_pixels.astype('float32')
    # standardize pixel values across channels (global)
    mean, std = face_pixels.mean(), face_pixels.std()
    face_pixels = (face_pixels - mean) / std
    # transform face into one sample
    samples = expand_dims(face_pixels, axis=0)
    # make prediction to get embedding
    yhat = model.predict(samples)
    return yhat[0]

In [14]:
# load the face dataset
data = load('./yale-face-database.npz')
trainX, trainy, testX, testy = data['arr_0'], data['arr_1'], data['arr_2'], data['arr_3']
print('Loaded: ', trainX.shape, trainy.shape, testX.shape, testy.shape)
# load the facenet model
model = load_model('../input/facenet-keras/facenet_keras.h5')
print('Loaded Model')
# convert each face in the train set to an embedding
newTrainX = list()
for face_pixels in trainX:
    embedding = get_embedding(model, face_pixels)
    newTrainX.append(embedding)
newTrainX = asarray(newTrainX)
print(newTrainX.shape)
# convert each face in the test set to an embedding
newTestX = list()
for face_pixels in testX:
    embedding = get_embedding(model, face_pixels)
    newTestX.append(embedding)
newTestX = asarray(newTestX)
print(newTestX.shape)
# save arrays to one file in compressed format
savez_compressed('./yale-faces-embeddings.npz', newTrainX, trainy, newTestX, testy)

In [15]:
# perform classification
from random import choice
from numpy import load
from numpy import expand_dims
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import Normalizer
from sklearn.svm import SVC
from matplotlib import pyplot
# load faces
# data = load('./yale-face-database.npz')
# testX_faces = data['arr_2']

# load face embeddings
data = load('./yale-faces-embeddings.npz')
trainX, trainy, testX, testy = data['arr_0'], data['arr_1'], data['arr_2'], data['arr_3']

# normalize input vectors
in_encoder = Normalizer(norm='l2')
trainX = in_encoder.transform(trainX)
testX = in_encoder.transform(testX)

# label encode targets
out_encoder = LabelEncoder()
out_encoder.fit(trainy)
trainy = out_encoder.transform(trainy)
testy = out_encoder.transform(testy)

In [16]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.tree import DecisionTreeClassifier

DTC = DecisionTreeClassifier(random_state=0)
DTC.fit(trainX, trainy)
y_pred = DTC.predict(testX)
print("accuracy score:{:.2f}".format(accuracy_score(testy, y_pred)))
print("Classification Results:\n{}".format(classification_report(testy, y_pred)))

# Eigen Faces - then KNN

In [17]:
from os import listdir
import numpy as np
import matplotlib.pyplot as plt

# load images and extract faces for a given subject - updated version from the last model
def load_faces2(subject):
    faces = list()
    folder = '../input/yale-face-database/'
    # enumerate files
    for filename in listdir(folder):
        if filename.split(".")[0] == 'subject' + subject:
            # path
            path = folder + filename
            # get face
            face = plt.imread(path)
            # store
            faces.append(face)
    return faces

In [18]:
# updated version from the last model
def load_dataset2():
    subjects = ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12', '13', '14', '15']
    X_train, y_train, X_test, y_test = list(), list(), list(), list()
    count = 0
    
    for subject in subjects:
        faces = load_faces2(subject)
        labels = [subject for _ in range(len(faces))]
         # summarize progress
        print('>loaded %d examples for class: %s' % (len(faces), subject))
        # store

        X_train.extend(faces[:8])
        y_train.extend(labels[:8])
        
        X_test.extend(faces[8:])
        y_test.extend(labels[8:])

            
            
    return asarray(X_train), asarray(y_train), asarray(X_test), asarray(y_test)

In [19]:
trainX1, trainy, testX1, testy = load_dataset2()
print(trainX.shape)
print(trainy.shape)
print(testX.shape)
print(testy.shape)

In [20]:
# Reshaping images for machine learning model
trainX = trainX1.reshape((trainX1.shape[0],trainX1.shape[1]*trainX1.shape[2]))
print("New trainX shape:",trainX.shape)

testX = testX1.reshape((testX1.shape[0],testX1.shape[1]*testX1.shape[2]))
print("New testX shape:",testX.shape)



In [21]:
# plotting a graph to see what the best value is for n_components

from sklearn.decomposition import PCA
pca=PCA()
pca.fit(trainX)

plt.figure(1, figsize=(12,8))

plt.plot(pca.explained_variance_, linewidth=2)
 
plt.xlabel('Components')
plt.ylabel('Explained Variaces')
plt.show()

In [22]:
# from the above diagram it can be seen that at around 40, the pca compenents represent the same data
from sklearn.decomposition import PCA
pca=PCA(n_components=40, whiten=True)
pca.fit(trainX)

In [23]:
# the average face of the dataset

fig,ax=plt.subplots(1,1,figsize=(8,8))
ax.imshow(pca.mean_.reshape((243,320)), cmap="gray")
ax.set_xticks([])
ax.set_yticks([])
ax.set_title('Average Face')

In [24]:
# pictures of some of the faces after encoding

number_of_eigenfaces=len(pca.components_)
eigen_faces=pca.components_.reshape((number_of_eigenfaces, trainX1.shape[1], trainX1.shape[2]))

cols=10
rows=int(number_of_eigenfaces/cols)
fig, axarr=plt.subplots(nrows=rows, ncols=cols, figsize=(15,15))
axarr=axarr.flatten()
for i in range(number_of_eigenfaces):
    axarr[i].imshow(eigen_faces[i],cmap="gray")
    axarr[i].set_xticks([])
    axarr[i].set_yticks([])
    axarr[i].set_title("eigen id:{}".format(i))
plt.suptitle("All Eigen Faces".format(10*"=", 10*"="))

In [25]:
X_train_pca=pca.transform(trainX)
X_test_pca=pca.transform(testX)

In [26]:
from sklearn.neighbors import KNeighborsClassifier

KNN = KNeighborsClassifier(n_neighbors=1)
KNN.fit(X_train_pca, trainy)
y_pred = KNN.predict(X_test_pca)
print("accuracy score:{:.2f}".format(accuracy_score(testy, y_pred)))
print("Classification Results:\n{}".format(classification_report(testy, y_pred)))

#  Neural Network Model

In [27]:
# used for face detection
!pip install mtcnn

In [28]:
import os

In [29]:
# Imports
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import cv2
from PIL import Image

# Confirm mtcnn was installed correctly
import mtcnn
from mtcnn.mtcnn import MTCNN
from matplotlib.patches import Rectangle

from os import listdir
from tqdm import tqdm
import pandas as pd
from sklearn.model_selection import train_test_split
import seaborn as sns

In [32]:
DIRECTORY = "../input/yale-face-database/"

In [33]:
filename = "../input/yale-face-database/subject01.centerlight"
pixels = plt.imread(filename)

rgb_pixels = np.stack((pixels, pixels, pixels), axis=2)
print(rgb_pixels.shape)
plt.imshow(pixels)
plt.show()

In [34]:
# detecting facial features in the pictures
detector = MTCNN()
results = detector.detect_faces(rgb_pixels)
results

In [35]:
# function to find a face and draw box round it
def draw_image_with_boxes(data, result_list):
    # plot the image
    plt.imshow(data)
    # get the context for drawing boxes
    ax = plt.gca()
    # plot each box
    for result in result_list:
        # get coordinates
        x, y, width, height = result['box']
        # create the shape
        rect = Rectangle((x, y), width, height, fill=False, color='red')
        # draw the box
        ax.add_patch(rect)
    # show the plot
    plt.show()

# display faces on the original image
draw_image_with_boxes(rgb_pixels, results)

In [36]:
# extract a single face from a given photograph
def extract_face_from_file(filename, required_size=(160, 160)):
    # load image from file
    image = Image.open(filename)
    
    return extract_face(image, required_size)

def extract_face(image, required_size=(160, 160)):
    # convert to RGB, if needed
    image = image.convert('RGB')
    # convert to array
    pixels = np.asarray(image)
    # detect faces in the image
    results = detector.detect_faces(pixels)
    # extract the bounding box from the first face
    x1, y1, width, height = results[0]['box']
    # bug fix
    x1, y1 = abs(x1), abs(y1)
    x2, y2 = x1 + width, y1 + height
    # extract the face
    face = pixels[y1:y2, x1:x2]
    # resize pixels to the model size
    image = Image.fromarray(face)
    image = image.resize(required_size)
    face_array = np.asarray(image)
    gray_face = cv2.cvtColor(face_array, cv2.COLOR_BGR2GRAY)
    
    return gray_face


# Create the detector, using default weights
detector = MTCNN()

# load the photo and extract the face
face_pixels = extract_face_from_file("../input/yale-face-database/subject01.centerlight")

plt.imshow(face_pixels)

In [37]:
def list_files(directory, contains):
    return list(f for f in listdir(directory) if contains in f)

In [38]:
i = 1
faces = list()
for filename in tqdm(list_files(DIRECTORY, "subject")[0:16]):
    # path
    path = DIRECTORY + filename
    # get face
    face = extract_face_from_file(path)
    # plot
    plt.subplot(4, 4, i)
    plt.axis('off')
    plt.imshow(face)
    faces.append(face)
    i += 1
plt.show()

In [39]:
# list filenames
filenames = pd.DataFrame(list_files(DIRECTORY, "subject"))

# generate split 
df = filenames[0].str.split(".", expand=True)
df["filename"] = filenames

# # tidy columns
df = df.rename(columns = {0:"subject", 1:"category"})
df['subject'] = df.subject.str.replace('subject' , '')
df.apply(pd.to_numeric, errors='coerce').dropna()
df['subject'] = pd.to_numeric(df["subject"])
df

In [40]:
PER_CLASS = 8 # 11 images (3 test & 8 train)
NO_CLASSES = 15
DS_SIZE = df["subject"].count()
TEST_SIZE = 1 - (PER_CLASS * NO_CLASSES / DS_SIZE)

# # list files for each group
# # df.groupby(['subject'])['filename'].apply(list)
y = df['subject']
X = df.drop('subject',axis=1)

# # subject
X_train_info, X_test_info, y_train, y_test = train_test_split(X, y, test_size=TEST_SIZE, random_state=45, stratify=y)

y_train = y_train.tolist()
y_test = y_test.tolist()

In [41]:
# detector = MTCNN()

def load_dataset(dataset):
    faces = list()
    for filename in tqdm(dataset["filename"]):
        path = DIRECTORY + filename
        # get face
        face = extract_face_from_file(path)
        faces.append(face)
    return np.asarray(faces)

In [42]:
X_test = load_dataset(X_test_info)
X_train = load_dataset(X_train_info)

print(X_test.shape)
print(X_train.shape)

In [44]:
# Options 

TRAINING_DATA_DIRECTORY = "data/train"
TESTING_DATA_DIRECTORY = "data/test"
NUM_CLASSES = 15
EPOCHS = 25
BATCH_SIZE = 5
NUMBER_OF_TRAINING_IMAGES = 120
NUMBER_OF_TESTING_IMAGES = 45
IMAGE_HEIGHT = 160
IMAGE_WIDTH = 160

In [45]:
import os 

def save_keras_dataset(setname, dataset, labels, per_class):
    # combine labels and images to generate files
    data = sorted(list(zip(labels, dataset)), key=lambda x: x[0])

    # Save images
    j = 0
    for label, gray_img in tqdm(data):
        j = (j% per_class) + 1
        # Create directory
        directory = f"data/{setname}/class_{label}/"
        if not os.path.exists(directory):
                os.makedirs(directory)
        cv2.imwrite(f"{directory}class_{label}_{j}.png",gray_img)

In [46]:
# clear directory if it already exists
import shutil
shutil.rmtree(r'data', ignore_errors=True)

# Save datasets
save_keras_dataset("test", X_test, y_test, 3)
save_keras_dataset("train", X_train, y_train, 8)

In [47]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
def data_generator():
    return ImageDataGenerator(
          rescale=1./255,
#           horizontal_flip=True,
#         fill_mode="nearest",
#         zoom_range=0.1,
#         width_shift_range=0.1,
#         height_shift_range=0.1,
#         rotation_range=10,
#         preprocessing_function=add_noise
    )

def add_noise(img):
    """Add random noise to an image"""
    VARIABILITY = 35
    deviation = VARIABILITY*random.random()
    noise = np.random.normal(0, deviation, img.shape)
    img += noise
    np.clip(img, 0., 255.)
    return img

In [48]:
# Setup Data Generators
training_generator = data_generator().flow_from_directory(
    TRAINING_DATA_DIRECTORY,
    target_size=(IMAGE_WIDTH, IMAGE_HEIGHT),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    color_mode='grayscale'
)

testing_generator = data_generator().flow_from_directory(
    TESTING_DATA_DIRECTORY,
    target_size=(IMAGE_WIDTH, IMAGE_HEIGHT),
    class_mode='categorical',
    color_mode='grayscale'
)

validation_generator = data_generator().flow_from_directory(
    TESTING_DATA_DIRECTORY,
    target_size=(IMAGE_WIDTH, IMAGE_HEIGHT),
    class_mode='categorical',
    color_mode='grayscale',
    shuffle=False # IMPORTANT: to ensure classes line up with batches
)

In [49]:
import random
sample_images = testing_generator.next()[0]

f, xyarr = plt.subplots(3,3)
xyarr[0,0].imshow(sample_images[0])
xyarr[0,1].imshow(sample_images[1])
xyarr[0,2].imshow(sample_images[2])
xyarr[1,0].imshow(sample_images[3])
xyarr[1,1].imshow(sample_images[4])
xyarr[1,2].imshow(sample_images[5])
xyarr[2,0].imshow(sample_images[6])
xyarr[2,1].imshow(sample_images[7])
xyarr[2,2].imshow(sample_images[8])
plt.show()

In [50]:
import keras
class MCDropout(keras.layers.Dropout):
    def call(self, inputs):
        return super().call(inputs, training=True)

In [52]:
from tensorflow.keras import models
from tensorflow.keras.layers import Activation, ZeroPadding2D, MaxPooling2D, Conv2D, Flatten, Dense, Dropout
from tensorflow.keras import regularizers, constraints

# Model
model = models.Sequential()

# Convolution layers
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(IMAGE_WIDTH, IMAGE_HEIGHT, 1), padding='same'))
model.add(MaxPooling2D((2, 2)))

model.add(Conv2D(64, (2, 2), activation='relu', padding='same'))
model.add(MaxPooling2D(2))

model.add(Conv2D(64, (2, 2), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Flatten the Convolution
model.add(Flatten())

# dense layer
model.add(Dense(units = 64, activation='relu'))

# drop layer to reduce overfitting
model.add(Dropout(rate=0.2))

# Final output layer
model.add(Dense(NUM_CLASSES, activation='softmax'))
          
model.summary()

In [53]:
from tensorflow.keras import optimizers, losses
from tensorflow.keras.callbacks import EarlyStopping
early_stopping = EarlyStopping()

model.compile(
    loss=losses.CategoricalCrossentropy(),
    optimizer=optimizers.Adam(learning_rate=0.0003),
    metrics=["accuracy"]
)

# from_logits=True

history = model.fit(
    training_generator,
    steps_per_epoch=(NUMBER_OF_TRAINING_IMAGES//BATCH_SIZE ),
    epochs=EPOCHS,
    validation_data=testing_generator,
    shuffle=True,
    validation_steps=(NUMBER_OF_TESTING_IMAGES//BATCH_SIZE),
#     callbacks=[early_stopping]
)

In [54]:
from sklearn.metrics import classification_report, confusion_matrix, precision_recall_fscore_support
from sklearn.metrics import accuracy_score

Y_pred = model.predict(validation_generator)
y_pred = np.argmax(Y_pred, axis=1)
print("classification report")
print(classification_report(validation_generator.classes, y_pred))
print("average:")
print(accuracy_score(validation_generator.classes, y_pred))

# i = 0
# average = 0
# while i < 10:
#     Y_pred = model.predict(validation_generator)
#     y_pred = np.argmax(Y_pred, axis=1)
    
#     average = average + accuracy_score(validation_generator.classes, y_pred)
#     i = i + 1
    
# average = average / 10
# print("average:")
# print(average)
