Stanislas Deneuville - Emmanuel Ferrandi - Pol Grisart - Marine Médard
# Project of data science :  Face recognition in a video and counting
16/11/2018

## Part II : Train&Use

In [55]:
import random
import keras
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt

__split__ : split the set into two well mixed set 


In : 
* data : list of all the images 

Out : 
* train_set : list of the images in the training set representing 66% of data
* test_set : list of the images in the test set representing the other 34% of data

In [56]:
TRAIN_TEST_RATIO = 0.66
def split(data:np.ndarray, train_test_ratio:int=TRAIN_TEST_RATIO, random_split=True) :
    if random_split:
        # Shuffle
        np.random.shuffle(data)
    
    # Split data
    cut_index = round(data.shape[0] * train_test_ratio) 
    train_set = data[:cut_index, :]
    test_set = data[cut_index:, :]
    return(train_set, test_set)

In [57]:
MAX_CATEGORY = 14

def equalize(subsets):
    # TODO
    return subsets

def load_and_split():
    subsets = []
    for nb_face in range(MAX_CATEGORY+1):
        folder_path = os.path.join("train_set", str(nb_face))
        if (os.path.isdir(folder_path)) :

            # Y value of these alements
            categorical_y = np.zeros((1, MAX_CATEGORY+1))
            categorical_y[0, nb_face] = 1

            subsets.append(np.zeros((0, 50*50 + MAX_CATEGORY+1)))

            for filename in os.listdir(folder_path):
                # Filter non image files
                if ".jpeg" in filename or ".png" in filename or ".jpg" in filename:
                    x = plt.imread(os.path.join(folder_path, filename)).reshape((1, 50*50))
                    y = categorical_y
                    xy = np.concatenate((x,y), axis=1)
                    print(nb_face)
                    subsets[nb_face] = np.concatenate((subsets[nb_face], xy), axis=0)
    
    # Equalize to have the same number of each Y value
    equalized_subsets = equalize(subsets)
    
    # Split data respecting equalization
    train_set, test_set = split(equalized_subsets[0])
    for subset in equalized_subsets[1:]:
        add_train_set, add_test_set = split(subset)
        train_set = np.concatenate((train_set, add_train_set), axis=0)
        test_set = np.concatenate((test_set, add_test_set), axis=0)
    
    # Shuffle
    random.shuffle(train_set)
    random.shuffle(test_set)
    
    print("Train size = {}, test size = {}".format(train_set.shape, test_set.shape))
    
    return train_set, test_set
        

__train_neuural_network__ : function that creates a neural network and trains it with the train_set 

In [58]:
def generate_model():
    # Simple model
    model = keras.models.Sequential()
    
    # Add neurone layers
    model.add(keras.layers.Dense(units=500, activation='relu', input_dim=50*50))
    model.add(keras.layers.Dense(units=100, activation='relu'))
    model.add(keras.layers.Dense(units=40, activation='relu'))
    model.add(keras.layers.Dense(units=15, activation='relu'))
    
    # Learning process
    model.compile(loss='categorical_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])
    return model 

def train_neural_network(train_set):
    print("Generating model")
    model = generate_model()
    
    x_train = train_set[:, :50*50]
    y_train = train_set[:, 50*50:]
    
    print("Start training")
    print(x_train)
    print(y_train)
    model.fit(x_train, y_train, epochs=5, batch_size=32)
    
    return model

__evaluate_performance__ : function that applys the neural network on the images in the test_set and compare with the real number of faces in these images 

In [59]:
def evaluate_performance(model, test_set):
    
    x_test = test_set[:, :50*50]
    y_test = test_set[:, 50*50:]
    
    loss_and_metrics = model.evaluate(x_test, y_test, batch_size=128)
    

In [60]:
def save_model(model) :
    # serialize model to JSON
    model_json = model.to_json()
    with open("model.json", "w") as json_file:
        json_file.write(model_json)
    # serialize weights to HDF5
    model.save_weights("model.h5")
    print("Saved model to disk")
 



In [61]:
print("Split")
train_set, test_set = load_and_split()

print("Train")
model = train_neural_network(train_set)

print("Save")
save_model(model)

print("Evaluate")
evaluate_performance(model, test_set)

Split
0
0
0
0
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
4
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
6
6
6
6
6
6
6
6
6
6
6
6
6
6
6
6
6
6
6
6
6
6
6
6
7
7
7
7
7
7
7
7
7
7
7
7
7
7
7
8
8
8
8
8
8
8
8
8
8
8
8
9
9
9
10
10
11
11
11
11
11
12
12
12
14


IndexError: list index out of range