# Face identification

### Inport needed libraries

For clarity, and to avoid problems, firstly include all needed libraries at the begining of the notebook. Import all needed libraries.

In [0]:
# Import general purpose python libraries
import os
import matplotlib.pyplot as plt
from PIL import Image # For handling the images
import numpy as np
import math

# Import different Keras functionalities
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Flatten
#from keras.layers import concatenate
#from keras.constraints import maxnorm
from keras.optimizers import SGD
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.utils import np_utils
from keras import backend as K
K.common.image_dim_ordering()

from keras.models import Model
from keras.layers import Input, Dense
import keras.backend.tensorflow_backend as K2
from keras.models import load_model
import tensorflow as tf

from keras.applications.resnet50 import ResNet50

from pathlib import Path

import matplotlib.pyplot as plt

from keras.preprocessing.image import ImageDataGenerator 

# Import function to plot the confussion matrix
#import plotcm

### Database samples

The face database is in folder "MIT-CBCL". Database images are split into "train", "test", and "val", folders. Each of those three folders is composed of 10 different folders, each of those folders contains images of each subject. Images do not have the same size but most of them are close to 150x150 pixels. With the next lines you can see a sample image of each subject in the training folder.

In [5]:
path_subjects = "./MIT-CBCL/train"
for i in os.listdir(path_subjects):
    count = 0
    for j in os.listdir(path_subjects + '/' + str(i)):
        if count==0:
            count = 1
            print(path_subjects + '/' + str(i) + '/' + str(j))

            img = Image.open(path_subjects + '/' + str(i) + '/' + str(j))
            plt.imshow(img, cmap='gray', vmin=0, vmax=255)
            plt.title('subject: ' + str(i))
            plt.show()

FileNotFoundError: ignored

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


### Configuration parameters
Configuration values of different parts of the solution. You should change some of them to obtain better results.

In [0]:
# Randomize the initial network weights
random_seed = True

# Parameters that characterizes the images, size and image type
img_width = 150
img_height = 150
img_mode = "grayscale" #Load mode for images, either rgb or grayscale. In our case although some images could be rgb,
                       #we are going to work with grayscale images

# Parameters that configures the training process
batch_size = 1 # Batch size
epochs = 1 # Number of epochs
initial_epoch = 0 # Initial epoch, it can be greater than 0 if you want to contiue a previous training process
initial_lr = 1e-10 # Learning rate

# Paths to where training, testing, and validation images are
database_dir = './MIT-CBCL'
train_dir = './MIT-CBCL/train'
val_dir = './MIT-CBCL/val'
test_dir = './MIT-CBCL/test'

# Directory where to store weights of the model and results
experiment_rootdir = "./test/"
# Create experiment directory if it does not exists
if not os.path.exists(experiment_rootdir):
    os.makedirs(experiment_rootdir)

weights_path = "weights.h5" # Name of the file to store the weights

# Output dimension (number of sublects in our problem)
num_classes = 10

# Name of each gesture of the database
CLASSES = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

# Training process

### Create the model
Here you should introduce your solution for the model that obtains better results.

In [0]:
def getModel(img_width, img_height, img_channels, output_dim, weights_path):
    """
    Initialize model.

    # Arguments
       img_width: Target image widht.
       img_height: Target image height.
       img_channels: Target image channels.
       output_dim: Dimension of model output (number of classes).
       weights_path: Path to pre-trained model.

    # Returns
       model: A Model instance.
    """
    
    # Define the input shape indicating the width, heigh, and depth of the images
    input_image = (img_width,img_height,img_channels)
    # Create the model itself
    restnet = ResNet50(include_top= True, weights= None, input_shape=input_image, classes=output_dim)
     
    #out= Dense(output_dim, activation='softmax')(restnet)

    #model=model.add(Dense(512, activation='relu', input_dim=input_shape))
    #model = Model(inputs=input_image, output=out);
    
    # Load pretrained model if it exists
    if weights_path:
        try:
            restnet.load_weights(weights_path)
            print("Loaded model from {}".format(weights_path))
        except:
            print("Impossible to find weight path. Returning untrained model")

    # Return the model itself
    return restnet

### Set model training process
Includes the compiles, which you can modify, a callback to just save the model if the validation loss decreases, and fits the model.

In [0]:
def trainModel(train_data_generator, val_data_generator, model, initial_epoch, initial_lr, experiment_rootdir, batch_size, epochs, weights_path):
    """
    Model training.

    # Arguments
       train_data_generator: Training data generated batch by batch.
       val_data_generator: Validation data generated batch by batch.
       model: A Model instance.
       initial_epoch: Epoch from which training starts.
       
    # Returns
        history: Model history
    
    """
    
    # Configure the trainig process by compiling the model. Select the loss fucntion, the optimizer, and the metric used to obtain results.
    sgd = SGD(lr=initial_lr, momentum=0.9, decay=0, nesterov=False)

    model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
    #print(model.summary())
    
    # Define training and validation steps taking into account the number of samples for each process and the batch size
    steps_per_epoch = math.floor(train_data_generator.samples / batch_size)
    validation_steps = math.floor(val_data_generator.samples / batch_size)
        
    # Fit the model by using the fit generator
    model.fit_generator(train_data_generator,steps_per_epoch= steps_per_epoch, validation_data=val_data_generator,  validation_steps=validation_steps, epochs = epochs)
    
    # Return the history of the model to plot the loss and accuracy evolution
    history = model.fit_generator(train_data_generator, val_data_generator, epochs = epochs)
    
    
                

In [0]:
math.floor(train_generator.samples / batch_size)

1200

### Load training and validation data
Loads training and validation data in a DataGenerator which divides the data in batches and prepares it for the training process.

In [0]:
# Set random seed
if random_seed:
    seed = np.random.randint(0,2*31-1)
else:
    seed = 5
np.random.seed(seed)
tf.set_random_seed(seed)

# Select the number of channels of the image considering the image mode (RGB or grayscale)
if img_mode=='rgb':
    img_channels = 3
elif img_mode == 'grayscale':
    img_channels = 1
else:
    raise IOError("Unidentified image mode: use 'grayscale' or 'rgb'")

# Create train_datagenerator using ImageDataGenerator of keras.

train_data_generator = ImageDataGenerator(featurewise_center=True, featurewise_std_normalization=True, rescale =1./255)

# Iterator object containing training data to be generated batch by batch

train_generator=train_data_generator.flow_from_directory(train_dir, target_size=(150, 150), color_mode='grayscale')

# Check if the number of classes in dataset corresponds to the one specified                                                    
assert train_generator.num_classes == num_classes, \
                    " Not macthing output dimensions in training data."                                                    


# Create val_datagenerator using ImageDataGenerator of keras.

val_data_generator = ImageDataGenerator(featurewise_center=True, featurewise_std_normalization=True, rescale =1./255)

# Iterator object containing validation data to be generated batch by batch

val_generator=val_data_generator.flow_from_directory(val_dir, target_size=(150, 150), color_mode='grayscale')

# Check if the number of classes in dataset corresponds to the one specified
assert val_generator.num_classes == num_classes, \
                    " Not macthing output dimensions in validation data."

Found 1200 images belonging to 10 classes.
Found 400 images belonging to 10 classes.


### Obtain and train the model itself
Load the model that you have specificly created and trains it

In [0]:
# Create the model by using the getModel function
trained_model=getModel(img_width, img_height, img_channels, num_classes, weights_path)

# Train the model by using the trainModel function
trained_model=trainModel(train_generator, val_generator, trained_model, initial_epoch, initial_lr, experiment_rootdir, batch_size, epochs, weights_path)

# Save weights that can be used in future training process
weights_save_path = os.path.join(experiment_rootdir, weights_path)
trained_model.save_weights(weights_save_path)

Impossible to find weight path. Returning untrained model




Epoch 1/1
Epoch 1/1


TypeError: '<' not supported between instances of 'int' and 'DirectoryIterator'

# Testing

### Predictions computation
Function to obtain the predictions over the testing data, it also outputs the ground truth of the input data

### Load testing data

In [0]:
# Create test_datagenerator using ImageDataGenerator of keras.
test_data_generator = ImageDataGenerator(featurewise_center=True, featurewise_std_normalization=True, rescale =1./255)

# Iterator object containing validation data to be generated batch by batch
test_generator=test_data_generator.flow_from_directory(test_dir, target_size=(150, 150), color_mode='grayscale')

# Check if the number of classes in dataset corresponds to the one specified
assert test_generator.num_classes == num_classes, \
                    " Not macthing output dimensions in test data."

Found 400 images belonging to 10 classes.


### Testing process
Load the model, load the weight obtained by the training process, obtain testing results and plot those results in a confusion matrix.

In [0]:
# Create the model by using the getModel function
test_model=getModel(img_width, img_height, img_channels, num_classes, weights_path)


# Load saved weights
weights_load_path = os.path.join(experiment_rootdir, weights_path)

try:
    test_model.load_weights(weights_load_path)
    print("Loaded model from {}".format(weights_load_path))
except:
    print("Impossible to find weight path. Returning untrained model")


# Compile model
test_model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

# Get predictions using predict_generator function
predicted_values=predict_generator(test_generator, steps=test_generator.samples//batch_size)

# Select the class which has the higher predicted value
max_class= max(predicted_values)

# Create groundtruth
gt = np.zeros(pred_labels.shape)
init_index = 0
end_index = 0
user_id = 0
for users in sorted(os.listdir(test_dir)):
    user_id = user_id + 1
    if os.path.isdir(os.path.join(test_dir, users)):
        user_path = os.path.join(test_dir, users)
        for root, _, files in sorted(os.walk(user_path)):
            num_images_user = len(files)
            end_index = init_index + num_images_user
            gt[init_index:end_index] = user_id - 1
            init_index = end_index
            
# Evaluate predictions: Average accuracy and highest errors
print("-----------------------------------------------")
print("Evaluation:")
# Compute average accuracy
ave_accuracy = metrics.accuracy_score(gt, pred_labels)
print('Average accuracy = ', ave_accuracy)
print("-----------------------------------------------")

# Visualize confusion matrix                                           
plotcm.plotcm(experiment_rootdir, gt, pred_labels,CLASSES, experiment_rootdir, normalize=True)

Impossible to find weight path. Returning untrained model
Impossible to find weight path. Returning untrained model


NameError: name 'sgd' is not defined

### Plot history for accuracy for training and validation process

In [0]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')

# Save the figure
plt.savefig(accuracy_img_name)

# Show figure
plt.show()

### Plot history for loss for training and validation process

In [0]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')

# Save the figure
plt.savefig(loss_img_name)

# Show figure
plt.show()