#Import and Initial Mount Disk


In [1]:
# install library
# !pip install -U tensorflow-addons
# !pip install facenet-pytorch


In [2]:
import os
# Mount drive
# from google.colab import drive
# drive.mount("/content/drive")
# path = "/content/drive/My Drive/Colab Notebooks/face_recognize"
# os.chdir(path)

import time
import tensorflow_addons as tfa
import tensorflow as tf
from tensorflow.keras import models, layers, metrics, optimizers, Model
from tensorflow.keras.layers import Input, Dense, BatchNormalization
from functools import partial
import matplotlib.pyplot as plt
import numpy as np
import cv2
import math
import io
import pickle
import tensorflow_datasets as tfds
import random
import csv

from train_tensorflow.inceptionresnetv1 import InceptionResNetV1
from train_tensorflow.models import convert_model_to_embedding,\
    LayerBeforeArcFace, ArcFaceLoss, \
    call_instance_model, convert_dense_layer_to_arcface,\
    special_convert_dense_layer_to_arcface
from train_tensorflow.Classify import Classify
from tool.FormatFunction import FormatFunction
from tool.FileFunction import FileFunction
from tool.GlobalValue import GlobalValue


In [3]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))


Num GPUs Available:  1


# Train

## Init value

In [4]:
READ_RAW_DATA_THEN_SAVE = False
backbone_type = "InceptionResNetV2"
head_type = "Dense"
MODEL_NAME = f"110-64-{backbone_type}-{head_type}"
is_convert = False
path_save_model = os.path.join(os.getcwd(), "save_model", MODEL_NAME)
global_value = GlobalValue(image_size=[80, 80], batch_size=64, shuffle_size=1000, ratio_train=0.9, ratio_test=0.1, ratio_valid=0.0, epochs=40, small_epochs=50,
                           image_each_class=15)
format_function = FormatFunction(global_value)
file_function = FileFunction()


## Prepare folder and other thing

In [5]:
# Create folder to save model
if not os.path.exists(path_save_model):
    os.makedirs(path_save_model)

# Read label dictionary(name of people not the path of image)
if READ_RAW_DATA_THEN_SAVE:
    label_dict = dict()
    label_dict.update(format_function.get_label_dict(
        os.path.join(os.getcwd(), "dataset", "CASIA_align")))
    label_dict.update(format_function.get_label_dict(
        os.path.join(os.getcwd(), "dataset", "AFDB")))
    path = os.path.join(os.getcwd(), "cache", "data", "label_dict.pkl")
    with open(path, 'wb') as file:
        pickle.dump(label_dict, file)
path = os.path.join(os.getcwd(), "cache", "data", "label_dict.pkl")
with open(path, 'rb') as f:
    label_dict = pickle.load(f)

# Save data path to file to read faster
if READ_RAW_DATA_THEN_SAVE:
    path_image_no_mask = list()
    path_image_no_mask.extend(file_function.get_data_path_by_dictionary(
        os.path.join(os.getcwd(), "dataset", "CASIA_align")))
    path_image_no_mask.extend(file_function.get_data_path_by_dictionary(
        os.path.join(os.getcwd(), "dataset", "AFDB")))
    saved_path = os.path.join(os.getcwd(), "cache",
                              "data", "path_image_no_mask.pkl")
    with open(saved_path, 'wb') as file:
        pickle.dump(path_image_no_mask, file)

    path_image_mask = list()
    path_image_mask.extend(file_function.get_data_path_by_dictionary(
        os.path.join(os.getcwd(), "dataset", "CASIA_mask")))
    path_image_mask.extend(file_function.get_data_path_by_dictionary(
        os.path.join(os.getcwd(), "dataset", "AFDB_mask")))
    saved_path = os.path.join(os.getcwd(), "cache",
                              "data", "path_image_mask.pkl")
    with open(saved_path, 'wb') as file:
        pickle.dump(path_image_mask, file)


## Start train

# Train version 1


In [6]:
# Create embedding model
# tf.config.run_functions_eagerly(True)
input_shape = (global_value.IMAGE_SIZE[0], global_value.IMAGE_SIZE[1], 3)
input = Input(shape=input_shape)
model = call_instance_model(input_shape, num_classes=len(label_dict), embd_shape=512, head_type=head_type,
                            backbone_type=backbone_type, use_pretrain=False, name="facenet")
# ----find the latest epoch
actual_epochs = 1
if is_convert:
    path_dense = os.path.join(os.getcwd(), "save_model", "160-64-InceptionResNetV1-Dense(v1)",
                              "epoch35.h5")
    model = special_convert_dense_layer_to_arcface(path_dense, input_shape,
                                                   len(label_dict), 512, model_name=model_name)
else:
    # ----find the latest epoch
    for i in range(1000):
        last_save_path = os.path.join(
            path_save_model, "epoch{}.h5".format(actual_epochs))
        if not os.path.exists(last_save_path):
            break
        actual_epochs += 1

    # Load saved model
    if (actual_epochs != 1):
        load_path = os.path.join(
            path_save_model, "epoch{}.h5".format(actual_epochs-1))
        print("load ", load_path)
        model.load_weights(load_path)

if head_type == 'ArcFace':
    loss_function = ArcFaceLoss(scale=30)
elif head_type == "Dense":
    loss_function = tf.keras.losses.SparseCategoricalCrossentropy(
        from_logits=True)
model.compile(
    optimizer=tf.keras.optimizers.Adam(0.001),
    loss=loss_function,
    metrics=[tf.keras.metrics.SparseCategoricalAccuracy()]
)
# model.summary()
# tf.keras.utils.plot_model(model, to_file="model.png", show_shapes=True)

# Normal train network
for i in range(global_value.EPOCHS):
    # Measure time
    now = time.time()
    # Read data path from file
    path = os.path.join(os.getcwd(), "cache", "data", "path_image_no_mask.pkl")
    with open(path, 'rb') as f:
        path_image_no_mask = pickle.load(f)
        path_image_no_mask = file_function.get_data_path_with_limit(
            path_image_no_mask, global_value.IMAGE_EACH_CLASS)
    path = os.path.join(os.getcwd(), "cache", "data", "path_image_mask.pkl")
    with open(path, 'rb') as f:
        path_image_mask = pickle.load(f)
        path_image_mask = file_function.get_data_path_with_limit(
            path_image_mask, global_value.IMAGE_EACH_CLASS)

    # Combine data path
    path_image_no_mask.extend(path_image_mask)
    random.shuffle(path_image_no_mask)

    # Index label (change label of data from string to number)
    label_index = list()
    for path in path_image_no_mask:
        label = path.split(os.sep)[-2]
        label = label_dict[label]
        label_index.append(label)
    path_dataset = tf.data.Dataset.from_tensor_slices(path_image_no_mask)
    label_dataset = tf.data.Dataset.from_tensor_slices(label_index)
    origin_dataset = tf.data.Dataset.zip((path_dataset, label_dataset))

    # Repeat data
    # origin_dataset  = origin_dataset.shuffle(global_value.SHUFFLE_SIZE).repeat(2)

    # Split train, test datase
    train_dataset, test_dataset, _ = format_function.get_dataset_partition(
        origin_dataset, 0.9, 0.1, 0)

    # read data from path
    train_dataset = train_dataset.map(
        format_function.process_image, num_parallel_calls=tf.data.AUTOTUNE)
    test_dataset = test_dataset.map(
        format_function.process_image, num_parallel_calls=tf.data.AUTOTUNE)

    # augmentation data(flip, rotate,...)
    train_dataset = train_dataset.map(
        format_function.augment_data, num_parallel_calls=tf.data.AUTOTUNE)
    test_dataset = test_dataset.map(
        format_function.augment_data, num_parallel_calls=tf.data.AUTOTUNE)

    # batch data
    train_dataset = train_dataset.batch(global_value.BATCH_SIZE)
    test_dataset = test_dataset.batch(global_value.BATCH_SIZE)

    # Set cache and prefetch to improve performance
    train_dataset = train_dataset.prefetch(
        buffer_size=tf.data.experimental.AUTOTUNE)
    test_dataset = test_dataset.prefetch(
        buffer_size=tf.data.experimental.AUTOTUNE)

    print("--------------------------big epoch {}--------------------------".format(actual_epochs))
    history = model.fit(
        train_dataset,
        epochs=1,
        validation_data=test_dataset
    )
    model.save_weights(os.path.join(
        path_save_model, "epoch{}.h5".format(actual_epochs)))
    with open(os.path.join(os.getcwd(), "cache", "log", MODEL_NAME+".csv"), "a", newline='') as f:
        row = [actual_epochs, history.history['loss'], history.history['sparse_categorical_accuracy'],
               history.history['val_loss'], history.history['val_sparse_categorical_accuracy'], time.time() - now]
        writer = csv.writer(f)
        writer.writerow(row)
    actual_epochs += 1
    # https://stackoverflow.com/questions/64734474/how-to-calculate-accuracy-for-facial-recognition-system


--------------------------big epoch 1--------------------------
 341/4944 [=>............................] - ETA: 31:03 - loss: 10.5219 - sparse_categorical_accuracy: 9.1642e-05