# STAT59800 Minichallenge

### Install the packages required
run only once when you first launch the container

In [None]:
# install required packages
# pip package upgrade (for opencv download)
# tensorflow, keras, jupyter (ipykernel) are already installed in this container
# only run once when setting up an environment

# pip upgrade
!pip install --upgrade pip setuptools wheel
!pip install --upgrade pip

# install the dependencies of VGGFace2
!pip install git+https://github.com/yaledhlab/vggface.git
    
# this part will takes more than 5mins (check "opencv-python\nSuccessfully installed opencv-python-4.9.0.80")
import subprocess

cmd = ["apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub", \
       "apt-get update", "apt-get install -y libgl1", "pip install opencv-python"]

for sub_cmd in cmd:
    runcmd = subprocess.Popen(sub_cmd, stdout = subprocess.PIPE, shell = True)
    [out, err] = runcmd.communicate()
    print(out)

# pip install other packages
!pip install natsort 
!pip install mtcnn 
!pip install pandas 
!pip install tqdm
!pip install keras_applications
!pip install scikit-learn
!pip install datatable
!pip install scikit-image

### Load the packages required for the script
load the packages for running the script and checking GPU status

In [1]:
# common
import os
from tqdm import tqdm
import shutil
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow.keras as keras
import datatable as dt

# pre-processing (face-recognition)
import cv2
import glob
import natsort
from PIL import Image
from mtcnn import MTCNN
from itertools import combinations
import json
from sklearn.model_selection import train_test_split

# face-identification
from keras_vggface.vggface import VGGFace
from keras_vggface import utils
from sklearn.preprocessing import OneHotEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers.experimental.preprocessing \
import RandomTranslation, RandomRotation, RandomFlip, RandomZoom, RandomContrast
from tensorflow.keras.layers import Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam, RMSprop, SGD
from tensorflow.keras.losses import SparseCategoricalCrossentropy, CategoricalCrossentropy

# evaluation & training
from tensorflow.keras.optimizers.schedules import ExponentialDecay, CosineDecay, CosineDecayRestarts
from tensorflow.keras.models import load_model

# miscellaneous
from tensorflow.python.client import device_lib
from tensorflow.keras import backend as K
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from skimage.metrics import structural_similarity
import copy

In [None]:
# os.environ['CUDA_VISIBLE_DEVICES'="0"]
print(device_lib.list_local_devices())

### Pre-processing of the images and labels
create the directories for saving the processed images \
using MTCNN to crop faces in the given images \
discard and classify problematic images separately \
(problmatic images: unreadable, undetectable, multiface/low confidence, low similarity) \
pre-process the test samples

In [2]:
# enable dir_empty option if want to delete the existing classified images
def create_preprocess_path(train_img_process_path, \
            train_img_suspect_path, train_img_lowconf_path, train_img_multiface_path, train_img_lowsimilar_path, \
            train_img_unprocess_path, train_img_unread_path, train_img_undetect_path, \
            test_img_process_path, test_img_unprocess_path, test_img_suspect_path, \
            train_img_data_path, test_img_data_path, dir_empty = False, parent_dir = r'tf'):
    
    if dir_empty:
        try: 
            shutil.rmtree(train_img_process_path)
        except:
            pass
        try: 
            shutil.rmtree(train_img_suspect_path) 
        except:
            pass
        try:
            shutil.rmtree(train_img_unprocess_path)
        except:
            pass
        try:
            shutil.rmtree(test_img_process_path)
        except:
            pass
        try:
            shutil.rmtree(test_img_unprocess_path)
        except:
            pass
        try:
            shutil.rmtree(test_img_suspect_path)
        except:
            pass
        try:
            shutil.rmtree(train_img_data_path)
        except:
            pass
        try:
            shutil.rmtree(test_img_data_path)
        except:
            pass

    # create the directories if they are not existing
    if not os.path.exists(train_img_process_path):
        os.makedirs(train_img_process_path)

    if not os.path.exists(os.path.join(train_img_unprocess_path,train_img_unread_path)):
        os.makedirs(os.path.join(train_img_unprocess_path,train_img_unread_path), exist_ok = True)

    if not os.path.exists(os.path.join(train_img_unprocess_path,train_img_undetect_path)):
        os.makedirs(os.path.join(train_img_unprocess_path,train_img_undetect_path), exist_ok = True)

    if not os.path.exists(os.path.join(train_img_suspect_path,train_img_lowconf_path)):
        os.makedirs(os.path.join(train_img_suspect_path,train_img_lowconf_path), exist_ok = True)

    if not os.path.exists(os.path.join(train_img_suspect_path,train_img_multiface_path)):
        os.makedirs(os.path.join(train_img_suspect_path,train_img_multiface_path), exist_ok = True)

    if not os.path.exists(os.path.join(train_img_suspect_path,train_img_lowsimilar_path)):
        os.makedirs(os.path.join(train_img_suspect_path,train_img_lowsimilar_path), exist_ok = True)
        
    if not os.path.exists(test_img_process_path):
        os.makedirs(test_img_process_path)
        
    if not os.path.exists(test_img_unprocess_path):
        os.makedirs(test_img_unprocess_path)
        
    if not os.path.exists(test_img_suspect_path):
        os.makedirs(test_img_suspect_path)

    if not os.path.exists(train_img_data_path):
        os.makedirs(train_img_data_path)
        
    if not os.path.exists(test_img_data_path):
        os.makedirs(test_img_data_path)

# create directory for saving training data
def create_train_path(training_results_path, \
            training_model_save_path, training_results_ckpt_path, \
            dir_empty = False, parent_dir = r'tf'):
    
    if dir_empty:
        try:
            shutil.rmtree(training_results_path)
        except:
            pass

    # create the directories if they are not existing
    if not os.path.exists(os.path.join(training_results_path,training_model_save_path)):
        os.makedirs(os.path.join(training_results_path,training_model_save_path))

    if not os.path.exists(os.path.join(training_results_path,training_results_ckpt_path)):
        os.makedirs(os.path.join(training_results_path,training_results_ckpt_path))
        
# crop faces from the image using the result of MTCNN
def cropface(frame,results,idx):

    # set the coordinates of the bounding box
    x_bl_corner, y_bl_corner, box_width, box_height = results[idx]['box']
    x_tr_corner = x_bl_corner+box_width
    y_tr_corner = y_bl_corner+box_height

    # get the bounding box image for the detected face
    raw_face = frame[y_bl_corner:y_tr_corner, x_bl_corner:x_tr_corner]

    # resize the image to conform the input size of VGGFace2 model 
    resized_face  = cv2.resize(raw_face, dsize = (224,224), interpolation = cv2.INTER_LINEAR)
    
    return resized_face

In [3]:
# change the current working directory to the path 
docker_parent_dir = r'/tf'
os.chdir(docker_parent_dir)

# label data path
label_dict_path = 'ws/labels/category.csv'
train_label_path = 'ws/labels/train.csv'
test_label_path = 'ws/labels'

# raw image path
train_img_raw_path = r'ws/images/train'
test_img_raw_path = r'ws/images/test'

# sample save path
train_img_process_path = r'ws/images/train_crop_success'
train_img_unprocess_path = r'ws/images/train_crop_fail'
train_img_suspect_path = r'ws/images/train_crop_suspect'
train_img_unread_path = r'cat1'
train_img_undetect_path = r'cat2'
train_img_lowconf_path = r'cat3'
train_img_multiface_path = r'cat4'
train_img_lowsimilar_path = r'cat5'
test_img_process_path = 'ws/images/test_ready'
test_img_suspect_path = 'ws/images/test_suspect'
test_img_unprocess_path = 'ws/images/test_process_fail'

# processed data folder
train_img_data_path = r'ws/data/train'
test_img_data_path = r'ws/data/test'

# create directory for saving data
create_preprocess_path(train_img_process_path, \
            train_img_suspect_path, train_img_lowconf_path, train_img_multiface_path, train_img_lowsimilar_path, \
            train_img_unprocess_path, train_img_unread_path, train_img_undetect_path, \
            test_img_process_path, test_img_unprocess_path, test_img_suspect_path, \
            train_img_data_path, test_img_data_path, dir_empty = False, parent_dir = docker_parent_dir)

# turn off pandas 'chained assignment' error
pd.set_option('mode.chained_assignment', None)

# haar cascade algorithm weights
cascade_filename = 'ws/src/haarcascade_profileface.xml'

# training results path
training_results_path = r'ws/train_results/'
training_model_save_path = r'train/model/'
training_results_ckpt_path = r'train/checkpoint/'

# create directory for saving data
create_train_path(training_results_path, \
            training_model_save_path, training_results_ckpt_path, \
            dir_empty = False, parent_dir = docker_parent_dir)

# pandas options
pd.set_option('display.max_row', 20)
pd.set_option('display.max_columns', 20)

In [None]:
# to utilize the pretrained feature extraction network of VGGFace2, get the input shape 
# load the feature extraction network
feature_extract_net = VGGFace(model='resnet50', include_top = False, input_shape = (224, 224, 3), pooling = 'avg')

# obtain the feature extract output dimension
config_in = feature_extract_net.layers[0].input
input_dim = config_in.shape
input_dtype = config_in.dtype

# read the label data using datatable package
label_type_dt = dt.fread(label_dict_path, encoding = 'utf-8')
label_type = label_type_dt.to_pandas().set_index('C0')
label_type.rename(columns = {'C1':(label_type.loc[label_type.index.isnull()]).iloc[0,0]}, inplace = True)
label_type = label_type.loc[label_type.index.notnull()]
label_type.index = label_type.index.astype('int64')
label_type.index.names = [None]

train_label_dt = dt.fread(train_label_path, encoding = 'utf-8')
train_label = train_label_dt.to_pandas().set_index('C0')
train_label.rename(columns = {'C1':(train_label.loc[train_label.index.isnull()]).iloc[0,0]}, inplace = True)
train_label.rename(columns = {'C2':(train_label.loc[train_label.index.isnull()]).iloc[0,1]}, inplace = True)
train_label = train_label.loc[train_label.index.notnull()]
train_label.index = train_label.index.astype('int64')
train_label.index.names = [None]

# mapping the label dictonary and train data set category
label_dict = label_type.Category.to_dict()
label_dict = dict(zip(label_dict.values(),label_dict.keys()))
train_label.columns = [k.replace(' ','_') for k in train_label.columns]
train_label['Category'] = train_label['Category'].map(label_dict)
train_label['Category'] = train_label['Category'].astype(np.uint8)

# create the status label to show the image status
# (problmatic images: unreadable, undetectable, multiface/low confidence, low similarity)
train_label['Status'] = np.zeros(train_label.shape[0],dtype = np.uint8)

# import *.jpg image list from the selected folder
train_img_raw_list = natsort.natsorted(glob.glob(os.path.join(train_img_raw_path,"*.jpg")))

if len(train_img_raw_list) != train_label.shape[0]:
    raise ValueError
    
# initialize MTCNN
face_detector = MTCNN(min_face_size = 30, scale_factor = 0.809)

# set the indices range to implement the pre-processing
process_start = 0
process_end = len(train_label)

# create the numpy array to store the cropped_image (load part of it for test)
cropped_image_data = np.empty((process_end-process_start,input_dim[1]*input_dim[2]*input_dim[3]), dtype = np.uint8)
# cropped_image_label = train_label.copy()
# cropped_image_label = cropped_image_label.iloc[list(range(process_start,process_end)),:]
# cropped_image_label.drop(columns = ['Status','Category'], inplace = True)

# drop out the indices of the training label that does not corresponds to the range
train_label = train_label.iloc[list(range(process_start,process_end)),:]

# iterate pre-processing for the all images in the directory
for file_idx in tqdm(range(0,process_end-process_start)):

    # load the image using opencv and get the file name
    frame = cv2.imread(train_img_raw_list[process_start+file_idx])
    file_name_split = os.path.split(train_img_raw_list[process_start+file_idx])

    # categroize the image that cannot read
    if frame is None:
        try:
            # use python image library if opencv fails
            temp = Image.open(train_img_raw_list[process_start+file_idx])
            frame = np.array(temp.convert("RGB"))

        except:
            # if unable to open, categorize as 1
            shutil.copy2(train_img_raw_list[process_start+file_idx], \
                         os.path.join(train_img_unprocess_path,train_img_unread_path,file_name_split[1]))
            train_label['Status'][process_start+file_idx] = 1
            cropped_image_data[file_idx,:] = np.zeros((input_dim[1]*input_dim[2]*input_dim[3]), dtype = np.uint8)

    else:
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # cv2 uses BGR -> RGB (will be converted to BGR later)
        
    # detect the face if the image has been read
    if frame is not None:
        results = face_detector.detect_faces(frame)

        if len(results) == 0:
            # (add later) another face detection algorithm (Harr Cascade from cv2)
            shutil.copy2(train_img_raw_list[process_start+file_idx], \
                         os.path.join(train_img_unprocess_path,train_img_undetect_path,file_name_split[1]))
            train_label['Status'][process_start+file_idx] = 2
            cropped_image_data[file_idx,:] = np.zeros((input_dim[1]*input_dim[2]*input_dim[3]), dtype = np.uint8)
            
        else:
            confidence_temp = [results[i]['confidence'] for i in range(len(results))]

            # categorize and save the cropped images (separately if low confidentiality)
            if len(results) == 1:
                if results[0]['confidence'] >= 0.85:
                    # normal case (single face + confidence > 85%) 
                    resized_face = cropface(frame, results, 0)
                    
                    # save image at the success folder (do not activate saving for actual train)
                    cv2.imwrite(os.path.join(train_img_process_path,file_name_split[1]), \
                                cv2.cvtColor(resized_face, cv2.COLOR_RGB2BGR))
                    # Disable this line to speed up the code (the image file list is already sorted)
                    # temp_idx = train_label.index[train_label['File_Name'] == file_name_split[1]].tolist()
                    train_label['Status'][process_start+file_idx] = 0

                    # save to the dataframe by converting the image to 1-D data
                    cropped_image_data[file_idx,:] = np.ravel(resized_face).astype(np.uint8)

                else:
                    # low confidence (single face + confidence < 85%) 
                    resized_face = cropface(frame, results, 0)

                    # save image at the suspect folder
                    cv2.imwrite(os.path.join(train_img_suspect_path,train_img_lowconf_path, \
                                             file_name_split[1]),cv2.cvtColor(resized_face, cv2.COLOR_RGB2BGR))
                    train_label['Status'][process_start+file_idx] = 3
                    cropped_image_data[file_idx,:] = np.ravel(resized_face.astype(np.uint8))
                    
            else:
                # if multiple faces are detected
                index_temp = [i for i in range(len(confidence_temp)) if confidence_temp[i] >= 0.85]
                index_max = np.argmax(confidence_temp)

                if len(index_temp) >= 1:

                    # multiple face (multi-face + confidence >= 85%)
                    for k in index_temp:
                        resized_face = cropface(frame, results, k)

                        # save image at the suspect folder
                        file_name_temp = list(file_name_split[1])
                        del file_name_temp[-4:]
                        file_name_temp.extend(["_",str(k),".jpg"])
                        file_name_temp = ''.join(file_name_temp)
                        cv2.imwrite(os.path.join(train_img_suspect_path,train_img_multiface_path, \
                                                 file_name_temp),cv2.cvtColor(resized_face, cv2.COLOR_RGB2BGR))
                        if k == index_max:
                            train_label['Status'][process_start+file_idx] = 4
                            cropped_image_data[file_idx,:] = np.ravel(resized_face).astype(np.uint8)

                else:
                    # low confidence (multi-face + confidence < 85%) 
                    resized_face = cropface(frame, results, index_max)

                    # save image at the suspect folder
                    cv2.imwrite(os.path.join(train_img_suspect_path,train_img_lowconf_path, \
                                             file_name_split[1]),cv2.cvtColor(resized_face, cv2.COLOR_RGB2BGR))
                    train_label['Status'][process_start+file_idx] = 3
                    cropped_image_data[file_idx,:] = np.ravel(resized_face).astype(np.uint8)
                    
# obtain the embedding vectors for the pretrained freature extraction network,
# then the embedding vectors are compared by using cosine similarity
# the feature extraction network already loaded

# obtain the feature extract output dimension
config_out = feature_extract_net.layers[-1].output
embedding_dim = config_out.shape
output_dtype = config_out.dtype

# set the threshold for the cosine similarity 
similar_threshold = np.float32(0.3)

# do this step for only the nominal images, eliminate problematic images
drop_idx = train_label.index[train_label['Status'] == 2].tolist()+ \
            train_label.index[train_label['Status'] == 3].tolist()+ \
            train_label.index[train_label['Status'] == 4].tolist()
drop_idx.sort()

abs_drop_idx = [train_label.index.get_loc(i) for i in drop_idx]

# copy the nominal images only and drop
cropped_image_data_post = cropped_image_data.copy()
# cropped_image_label = cropped_image_label.copy()
train_label_post = train_label.copy()

cropped_image_data_post = np.delete(cropped_image_data_post, abs_drop_idx, 0)
# cropped_image_label.drop(index = drop_idx, inplace = True)
train_label_post.drop(index = drop_idx, inplace = True)

# obtain the feature extract output dimension
config_out = feature_extract_net.layers[-1].output
embedding_dim = config_out.shape
output_dtype = config_out.dtype

# set the threshold for the cosine similarity 
similar_threshold = np.float32(0.3)

# categorize the item with respect to its label
for k, v in label_dict.items():
    
    # get the data index that has a specific label
    temp_idx = train_label_post.index[train_label_post['Category'] == v].tolist()
    
    abs_temp_idx = [train_label_post.index.get_loc(i) for i in temp_idx]

    # for the normal image with the same label, compare the similarity
    if len(temp_idx) > 1:
       
        # create the empty cosine similarity matrix
        cos_similarity = np.empty((len(temp_idx),len(temp_idx)), dtype = np.float64)
        
        # get the image data
        img_data_same_type_temp = cropped_image_data_post[abs_temp_idx,:]
        img_data_same_type_temp_feature = np.empty((len(temp_idx),embedding_dim[1]),dtype = np.float64)

        # apply the function
        img_data_same_type_temp_feature = feature_extract_net.predict( \
                utils.preprocess_input(img_data_same_type_temp.astype(np.float64). \
                                       reshape((-1,input_dim[1],input_dim[2],input_dim[3])), version = 2))
        
        # get the combination
        idx_comb = list(combinations(list(range(len(temp_idx))),2))
        
        # compute the similarity matrix
        for temp_comb in idx_comb:    
    
            cos_similarity[temp_comb[0],temp_comb[1]] = \
            np.dot(img_data_same_type_temp_feature[temp_comb[0],:], \
                  img_data_same_type_temp_feature[temp_comb[1],:])/ \
            (np.linalg.norm(img_data_same_type_temp_feature[temp_comb[0],:])*
             np.linalg.norm(img_data_same_type_temp_feature[temp_comb[1],:]))
    
            cos_similarity[temp_comb[1],temp_comb[0]] = \
            cos_similarity[temp_comb[0],temp_comb[1]]

        # ignore diagonal entries
        for diag_idx in list(range(len(temp_idx))):
            cos_similarity[diag_idx,diag_idx] = 0

        # categorize as problematic if images have too low similarity
        avg_cos_similarity = cos_similarity.sum(axis = 1)/(len(cos_similarity)-1)
        low_similarity_idx = [idx for idx, val in enumerate(avg_cos_similarity) if val <= similar_threshold]
        
        if len(low_similarity_idx) > 0:
            for move_idx in low_similarity_idx:
                
                # change the status of the images with low similarity
                train_label['Status'][temp_idx[move_idx]] = 5

                # move image from the nominal folder to the suspect folder
                file_name = train_label['File_Name'][temp_idx[move_idx]]

                shutil.move(os.path.join(train_img_process_path,file_name), \
                            os.path.join(train_img_suspect_path,train_img_lowsimilar_path,file_name))

# create the dataframe to store the cropped_image (load part of it for test)
chunk_size = 10000
num_chunks_raw = len(train_label) // chunk_size + 1  
                
# convert the raw data format to reduce the size (already np.uint8, but check once again)
cropped_image_data = cropped_image_data.astype(dtype = np.uint8)

# enumerating each chunk and save it as a file
for idx, chunk in enumerate(np.array_split(cropped_image_data, num_chunks_raw)):
    temp_file_name = 'cropped_image_data_'+str(idx)+'.csv'
    temp_dframe = pd.DataFrame(data = chunk, index = list(range(process_start+idx*len(chunk), \
                                                                process_start+(idx+1)*len(chunk))))
    temp_dframe.to_csv(os.path.join(train_img_data_path,temp_file_name), header = False)
        
train_label.to_csv(os.path.join(train_img_data_path,'train_label.csv'), header = False)

with open(os.path.join(train_img_data_path,'label_dict.json'), 'w') as f: 
    json.dump(label_dict, f, indent = 4)
    
# drop out the problematic data and save them as dataframe format
drop_idx_final = train_label.index[train_label['Status'] == 2].tolist()+ \
                 train_label.index[train_label['Status'] == 3].tolist()+ \
                 train_label.index[train_label['Status'] == 4].tolist()+ \
                 train_label.index[train_label['Status'] == 5].tolist()
drop_idx_final.sort()

abs_drop_idx_final = [train_label.index.get_loc(i) for i in drop_idx_final]

if len(drop_idx_final) > 0:
    x_train = np.delete(cropped_image_data, abs_drop_idx_final, 0)
    y_train = train_label.copy()
    y_train.drop(columns = ['Status'], inplace = True)
    y_train = y_train.drop(index = drop_idx_final).reset_index(drop = True)
    
else:
    x_train = cropped_image_data
    y_train = pd.Series.to_frame(train_label['File_Name','Category'].copy())

# create the dataframe to store the cropped_image (load part of it for test)
num_chunks_xtrain = len(x_train) // chunk_size + 1
    
# convert the raw data format to reduce the size (already np.uint8, but check once again)
x_train = x_train.astype(dtype = np.uint8)

# enumerating each chunk and save it as afile
for idx, chunk in enumerate(np.array_split(x_train, num_chunks_xtrain)):
    temp_file_name = 'x_train_'+str(idx)+'.csv'
    temp_dframe = pd.DataFrame(data = chunk, index = y_train.index[idx*len(chunk):(idx+1)*len(chunk)])
    temp_dframe.to_csv(os.path.join(train_img_data_path,temp_file_name), header = False)
    
y_train.to_csv(os.path.join(train_img_data_path,'y_train.csv'), header = False)


In [None]:
# to utilize the pretrained feature extraction network of VGGFace2, get the input shape 
# load the feature extraction network
feature_extract_net = VGGFace(model='resnet50', include_top = False, input_shape = (224, 224, 3), pooling = 'avg')

# obtain the feature extract output dimension
config_in = feature_extract_net.layers[0].input
input_dim = config_in.shape
input_dtype = config_in.dtype

# import *.jpg image list from the selected folder
test_img_raw_list = natsort.natsorted(glob.glob(os.path.join(test_img_raw_path,"*.jpg")))

# opening label category dictionary JSON file
with open(os.path.join(train_img_data_path,'label_dict.json')) as json_file:
    label_dict = json.load(json_file)

# get the maximum value in the dictionary
get_v_label_dict = [v for k, v in label_dict.items()]
max_idx = max(get_v_label_dict)

# initialize MTCNN
face_detector = MTCNN(min_face_size = 30, scale_factor = 0.809)

# initialize haar cascade algorithm
cascade = cv2.CascadeClassifier(cascade_filename)

# set the indices range to implement the pre-processing
process_start = 0
process_end = len(test_img_raw_list)

# create empty test label to save the identification result
test_label_nominal = pd.DataFrame(columns = ['Category','Status','File_Name'], \
                                index = list(range(process_start,process_end)))
test_label_suspect = pd.DataFrame(columns = ['Category','Status','File_Name'])

# create the numpy array to store the cropped_image (load part of it for test)
cropped_image_data = np.empty((process_end-process_start,input_dim[1]*input_dim[2]*input_dim[3]), dtype = np.uint8)
cropped_image_data_suspect = np.empty((0,input_dim[1]*input_dim[2]*input_dim[3]), dtype = np.uint8)

# iterate pre-processing for the all images in the directory
for file_idx in tqdm(range(0,process_end-process_start)):

    # load the image using opencv and get the file name
    frame = cv2.imread(test_img_raw_list[process_start+file_idx])
    file_name_split = os.path.split(test_img_raw_list[process_start+file_idx])

    # categroize the image that cannot read
    if frame is None:
        try:
            # use python image library if opencv fails
            temp = Image.open(test_img_raw_list[process_start+file_idx])
            frame = np.array(temp.convert("RGB"))

        except:
            # if unable to open, make it as a black image, categorize it using random guess
            shutil.copy2(test_image_raw_list[file_idx], \
                         os.path.join(test_img_unprocess_path,file_name_split[1]))
            test_label_nominal['Status'][process_start+file_idx] = 1
            test_label_nominal['Category'][process_start+file_idx] = 255
            test_label_nominal['File_Name'][process_start+file_idx] = file_name_split[1]
            cropped_image_data[file_idx,:] = np.zeros((req_img_dim[1]*req_img_dim[2]*req_img_dim[3]), \
                                                         dtype = np.uint8)
            
    else:
        
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # cv2 uses BGR -> RGB (will be converted to BGR later)
    
    # detect the face if the image has been read
    if frame is not None:
        results = face_detector.detect_faces(frame)

        # To be added:
        # In case of multi face -> confidence sort and find matching face, check one-hot vectors 
        if len(results) == 0: 
            gray = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY) 
            results_l = cascade.detectMultiScale(gray, scaleFactor = 1.3, minNeighbors = 5, minSize = (30,30))
            flipped = cv2.flip(gray,1)
            results_r = cascade.detectMultiScale(flipped, scaleFactor = 1.3, minNeighbors = 5, minSize = (30,30))

            if len(results_l) == 0 and len(results_r) == 0:
                # if no face is detected, resize to the input size and save
                resized_face = cv2.resize(frame, dsize = (input_dim[1],input_dim[2]), \
                                          interpolation = cv2.INTER_LINEAR)
                
            elif len(results_l) > 0:
                x_bl_corner = results_r[0][0]
                y_bl_corner = results_r[0][1] 
                box_width = results_r[0][2]
                box_height = results_r[0][3]
    
                x_tr_corner = x_bl_corner+box_width
                y_tr_corner = y_bl_corner+box_height

                # get the bounding box image for the detected face
                raw_face = frame[y_bl_corner:y_tr_corner, x_bl_corner:x_tr_corner]

                # resize the image to conform the input size of VGGFace2 model 
                resized_face  = cv2.resize(raw_face, dsize = (224,224), interpolation = cv2.INTER_LINEAR)

            else:
                x_bl_corner = results_r[0][0]
                y_bl_corner = results_r[0][1] 
                box_width = results_r[0][2]
                box_height = results_r[0][3]
    
                x_tr_corner = x_bl_corner+box_width
                y_tr_corner = y_bl_corner+box_height

                # get the bounding box image for the detected face
                raw_face = frame[y_bl_corner:y_tr_corner, x_bl_corner:x_tr_corner]

                # resize the image to conform the input size of VGGFace2 model 
                resized_face  = cv2.resize(raw_face, dsize = (224,224), interpolation = cv2.INTER_LINEAR)

            # save to the numpy array by converting the image to 1-D data
            cropped_image_data[file_idx,:] = np.ravel(resized_face)
            test_label_nominal['Status'][process_start+file_idx] = 2
            test_label_nominal['Category'][process_start+file_idx] = max_idx+1
            test_label_nominal['File_Name'][process_start+file_idx] = file_name_split[1]
            cv2.imwrite(os.path.join(test_img_process_path,file_name_split[1]), \
                        cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))

        elif len(results) == 1:
            resized_face = cropface(frame, results, 0)
                
            # save image at the success folder
            cv2.imwrite(os.path.join(test_img_process_path,file_name_split[1]), \
                        cv2.cvtColor(resized_face, cv2.COLOR_RGB2BGR))
            # set it as nominal image
            test_label_nominal['Status'][process_start+file_idx] = 0
            test_label_nominal['Category'][process_start+file_idx] = max_idx+1
            test_label_nominal['File_Name'][process_start+file_idx] = file_name_split[1]
            # save to the dataframe by converting the image to 1-D data
            cropped_image_data[file_idx,:] = np.ravel(resized_face).astype(np.uint8)
            
        else:
            # confidence level array
            confidence_temp = [results[i]['confidence'] for i in range(len(results))]
            # if multiple faces are detected and its confidence is larger than/equal to 85%
            index_temp = [i for i in range(len(confidence_temp)) if confidence_temp[i] >= 0.85]
            index_max = np.argmax(confidence_temp)
            
            # multiple face (multi-face + confidence >= 85%)
            for k in index_temp:
                resized_face = cropface(frame, results, k)
                # save image at the suspect folder
                file_name_temp = list(file_name_split[1])
                del file_name_temp[-4:]
                file_name_temp.extend(["_",str(k),".jpg"])
                file_name_temp = ''.join(file_name_temp)
                cv2.imwrite(os.path.join(test_img_suspect_path, file_name_temp), \
                            cv2.cvtColor(resized_face, cv2.COLOR_RGB2BGR))

                test_label_suspect = test_label_suspect.append \
                ({'Category': max_idx+1, 'Status': 4, 'File_Name': file_name_split[1]}, ignore_index=True)
                cropped_image_data_suspect = np.vstack((cropped_image_data_suspect, \
                                                        np.ravel(resized_face).astype(np.uint8)))
                
                if k == index_max:
                    # save this image at the success folder
                    cv2.imwrite(os.path.join(test_img_process_path,file_name_split[1]), \
                                cv2.cvtColor(resized_face, cv2.COLOR_RGB2BGR))
                    test_label_nominal['Status'][process_start+file_idx] = 4
                    test_label_nominal['Category'][process_start+file_idx] = max_idx+1
                    test_label_nominal['File_Name'][process_start+file_idx] = file_name_split[1]
                    # save to the dataframe by converting the image to 1-D data
                    cropped_image_data[file_idx,:] = np.ravel(resized_face).astype(np.uint8)
                    
# convert the raw data format to reduce the size
cropped_image_data = cropped_image_data.astype(dtype = 'uint8')
test_label_nominal = test_label_nominal.astype({'Category':np.uint8, 'Status':np.uint8})

# create the dataframe to store the cropped_image (load part of it for test)
chunk_size = 5000
num_chunks_raw = len(test_label_nominal) // chunk_size + 1

# enumerating each chunk and save it as afile
for idx, chunk in enumerate(np.array_split(cropped_image_data, num_chunks_raw)):
    temp_file_name = 'cropped_image_data_nominal_'+str(idx)+'.csv'
    temp_dframe = pd.DataFrame(data = chunk, index = list(range(process_start+idx*len(chunk), \
                                                                process_start+(idx+1)*len(chunk))))
    temp_dframe.to_csv(os.path.join(test_img_data_path,temp_file_name), header = False)
        
test_label_nominal.to_csv(os.path.join(test_img_data_path,'test_label_nominal_empty.csv'), header = False)

# create the dataframe to store the cropped_image (load part of it for test)
num_chunks_suspect = len(test_label_suspect) // chunk_size + 1

# enumerating each chunk and save it as afile
for idx, chunk in enumerate(np.array_split(cropped_image_data_suspect, num_chunks_suspect)):
    temp_file_name = 'cropped_image_data_suspect_'+str(idx)+'.csv'
    temp_dframe = pd.DataFrame(data = chunk, index = list(range(process_start+idx*len(chunk), \
                                                                process_start+(idx+1)*len(chunk))))
    temp_dframe.to_csv(os.path.join(test_img_data_path,temp_file_name), header = False)
        
test_label_suspect.to_csv(os.path.join(test_img_data_path,'test_label_suspect_empty.csv'), header = False)


### Learning
create layers (preprocessing layers + VGGFace + fc layers) \
load the training data \
training



In [None]:
# # clear the session to prevent the system from memory leak
K.clear_session()

# load only feature extraction
pretrained_base = VGGFace(model = 'resnet50', include_top = False, input_shape = (224, 224, 3), pooling = 'avg')
pretrained_base.trainable = False

# pre-processing layers
preprocessing = Sequential(name = 'preprocessing')
preprocessing.add(RandomTranslation(height_factor = 0.35, width_factor = 0.35, input_shape = (224, 224, 3)))
preprocessing.add(RandomRotation(factor = 0.166))
preprocessing.add(RandomFlip(mode = "horizontal"))
preprocessing.add(RandomZoom(height_factor = 0.4, width_factor = 0.4))
preprocessing.add(RandomContrast(factor = 0.3))

# fc layers
fc = Sequential(name = 'fc')
fc.add(Flatten())
fc.add(Dense(1024, activation = 'relu'))
fc.add(Dropout(0.2))
fc.add(Dense(100, activation = 'softmax'))

# add the fully connected to the pretrained feature extraction base network
model = Sequential()
model.add(preprocessing)
model.add(pretrained_base)
model.add(fc)

# learning rate scheduler
lr_scheduler = lr_schedule = ExponentialDecay(
    initial_learning_rate = 1e-3,decay_steps = 500, decay_rate = 0.90, staircase = True)
# CosineDecayRestarts(initial_learning_rate = 5e-4, first_decay_steps = 1000, t_mul = 2.0, m_mul = 1.0, alpha = 0.01)
# CosineDecay(initial_learning_rate = 5e-4, decay_steps = 10000, alpha = 0.02)

opt_alg = Adam(learning_rate = lr_scheduler)
# SGD(learning_rate = lr_scheduler, momentum = 0.9)

# set the learning process through compile step
model.compile(
    loss = CategoricalCrossentropy(),  
    optimizer = opt_alg,
    metrics = ['categorical_accuracy'])

model.summary()

In [None]:
# obtain the feature extract output dimension
config_in = model.layers[0].input
input_dim = config_in.shape

# read the saved data in csv format
# using datatable package
y_pre_dt = dt.fread(os.path.join(train_img_data_path,'y_train.csv'), encoding = 'utf-8')
y_pre = y_pre_dt['C2'].to_numpy()

x_pre_raw_list = natsort.natsorted(glob.glob(os.path.join(train_img_data_path,"x_train_*.csv")))
x_pre = np.empty((len(y_pre),input_dim[1]*input_dim[2]*input_dim[3]+1), dtype = np.uint8)

temp_ridx = 0
for file in x_pre_raw_list:
    x_pre_temp_dt = dt.fread(file, encoding = 'utf-8')
    x_pre_temp = x_pre_temp_dt.to_numpy()
    x_pre[temp_ridx:temp_ridx+len(x_pre_temp),:] = x_pre_temp
    temp_ridx = temp_ridx+len(x_pre_temp)
        
# check the data integrity (shuffled index check)
if x_pre.shape[0] != y_pre.shape[0]:
    raise ValueError

train_img_process_list = natsort.natsorted(glob.glob(os.path.join(train_img_process_path,"*.jpg")))
train_img_files_list = []
    
for file in train_img_process_list:
    file_name_split = os.path.split(file)
    file_name_temp = list(file_name_split[1])
    del file_name_temp[-4:]
    train_img_files_list.append(int("".join(file_name_temp)))
    
integrity_check_list = y_pre_dt['C1'].to_list()
integrity_check_list = integrity_check_list[0]

for idx in range(len(integrity_check_list)):
    file_name_temp = list(integrity_check_list[idx])
    del file_name_temp[-4:]
    integrity_check_list[idx] = int("".join(file_name_temp))
    
if (integrity_check_list != train_img_files_list):
    raise ValueError  

# remove the photo indices, change the file format and shuffle
x_pre = x_pre[:,1:]
y_pre = y_pre

[x_train, x_valid, y_train, y_valid] = train_test_split(x_pre, y_pre, test_size = 0.10, shuffle = True)

# reshape for the training process
x_train = x_train.reshape((-1,input_dim[1],input_dim[2],input_dim[3])).astype(np.float32)
x_valid = x_valid.reshape((-1,input_dim[1],input_dim[2],input_dim[3])).astype(np.float32)

# testing one-hot-encoding (wrong accuracy computation in sparse categorical crossentropy)
onehot = OneHotEncoder(sparse = False)
onehot.fit(y_train)
y_train = onehot.transform(y_train)
y_valid = onehot.transform(y_valid)


In [None]:
# note: RGB to BGR conversion is automatically implemented by utils.preprocess_input
# vggface_resnet50 uses BGR type format as input; cv2 already read the image in BGR (pyplot adopts RGB format)
# data_format = K.image_data_format()
# print(data_format)
x_train = utils.preprocess_input(x_train, version = 2)
x_valid = utils.preprocess_input(x_valid, version = 2)

train_tensor = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_tensor = train_tensor.batch(batch_size = 32, drop_remainder = False, num_parallel_calls = None)
validation_tensor = tf.data.Dataset.from_tensor_slices((x_valid, y_valid))
validation_tensor = validation_tensor.batch(batch_size = 32, drop_remainder = False, num_parallel_calls = None)

md_check_callback = tf.keras.callbacks.ModelCheckpoint(filepath = training_results_path+training_results_ckpt_path,
                                                    monitor = 'val_categorical_accuracy',
                                                    verbose = 1,
                                                    save_best_only = True,
                                                    save_weights_only = True,
                                                    mode = 'auto',
                                                    save_freq = 'epoch',
                                                    initial_value_threshold = None)

history = model.fit(train_tensor, epochs = 15, validation_data = validation_tensor, callbacks = md_check_callback)
model.save(training_results_path+training_model_save_path)

### Evaluation
load the test dataset \
create the submission labels

In [None]:
# load the pretrained model
if os.path.isdir(training_results_path+training_model_save_path):
    trained_model = keras.models.load_model(training_results_path+training_model_save_path[:-1])

# pop up the pre-processing layer
model = Sequential()
model.add(trained_model.layers[1])
model.add(trained_model.layers[2])
model.summary()
    
# obtain the feature extract output dimension
config_in = model.layers[0].input
input_dim = config_in.shape

# read the saved data in csv format
# using datatable package
test_label_nominal_empty = dt.fread(os.path.join(test_img_data_path,'test_label_nominal_empty.csv'), \
                                    encoding = 'utf-8')
test_label_nominal_empty = test_label_nominal_empty.to_pandas()
test_label_suspect_empty = dt.fread(os.path.join(test_img_data_path,'test_label_suspect_empty.csv'), \
                                    encoding = 'utf-8')
test_label_suspect_empty = test_label_suspect_empty.to_pandas()

cropped_image_data_nominal_list = natsort.natsorted(glob.glob(os.path.join(test_img_data_path, \
                                                                   "cropped_image_data_nominal_*.csv")))
cropped_image_data_nominal = np.empty((len(test_label_nominal_empty),input_dim[1]*input_dim[2]*input_dim[3]), 
                              dtype = np.float32)

temp_ridx = 0
for file in cropped_image_data_nominal_list:
    cropped_image_data_nominal_temp_dt = dt.fread(file, encoding = 'utf-8')
    cropped_image_data_nominal_temp = cropped_image_data_nominal_temp_dt.to_numpy()
    cropped_image_data_nominal[temp_ridx:temp_ridx+len(cropped_image_data_nominal_temp),:] = \
    cropped_image_data_nominal_temp[:,1:]
    temp_ridx = temp_ridx+len(cropped_image_data_nominal_temp)

del globals()['cropped_image_data_nominal_temp_dt']
del globals()['cropped_image_data_nominal_temp']

cropped_image_data_suspect_list = natsort.natsorted(glob.glob(os.path.join(test_img_data_path, \
                                                                   "cropped_image_data_suspect_*.csv")))
cropped_image_data_suspect = np.empty((len(test_label_suspect_empty),input_dim[1]*input_dim[2]*input_dim[3]), 
                              dtype = np.float32)

temp_ridx = 0
for file in cropped_image_data_suspect_list:
    cropped_image_data_suspect_temp_dt = dt.fread(file, encoding = 'utf-8')
    cropped_image_data_suspect_temp = cropped_image_data_suspect_temp_dt.to_numpy()
    cropped_image_data_suspect[temp_ridx:temp_ridx+len(cropped_image_data_suspect_temp),:] = \
    cropped_image_data_suspect_temp[:,1:]
    temp_ridx = temp_ridx+len(cropped_image_data_suspect_temp)

del globals()['cropped_image_data_suspect_temp_dt']
del globals()['cropped_image_data_suspect_temp']

# add probability measure for the multiple faces cases
test_label_suspect_empty['Prob'] = np.float32(0.0)
    
# make prediction using the trained model
for idx in tqdm(range(len(test_label_nominal_empty))):
    if test_label_nominal_empty['C2'][idx] == 0:
        temp_reformat = cropped_image_data_nominal[idx,:].reshape((-1,input_dim[1],input_dim[2],input_dim[3]))
        temp_result = model.predict(utils.preprocess_input(temp_reformat, version = 2))
        test_label_nominal_empty['C1'][idx] = np.argmax(temp_result)
        
    elif test_label_nominal_empty['C2'][idx] == 1:
        test_label_nominal_empty['C1'][idx] = random.randrange(0,100)
        
    elif test_label_nominal_empty['C2'][idx] == 2:
        temp_reformat = cropped_image_data_nominal[idx,:].reshape((-1,input_dim[1],input_dim[2],input_dim[3]))
        temp_result = model.predict(utils.preprocess_input(temp_reformat, version = 2))
        test_label_nominal_empty['C1'][idx] = np.argmax(temp_result)
        
    else:
        suspect_idx_temp = test_label_suspect_empty.index[test_label_suspect_empty['C3'] == \
                                                          test_label_nominal_empty['C3'][idx]]
        
        # integrity check
        if sum(cropped_image_data_nominal[idx,:]-cropped_image_data_suspect[suspect_idx_temp[0],:]) != 0:
            raise ValueError
        
        for sub_idx in suspect_idx_temp:
            temp_reformat = cropped_image_data_suspect[sub_idx,:]. \
                                reshape((-1,input_dim[1],input_dim[2],input_dim[3]))
            temp_result = model.predict(utils.preprocess_input(temp_reformat, version = 2))
            test_label_suspect_empty['C1'][sub_idx] = np.argmax(temp_result)
            test_label_suspect_empty['Prob'][sub_idx] = np.max(temp_result)
            
        max_idx = np.argmax(test_label_suspect_empty['Prob'][suspect_idx_temp])
        test_label_nominal_empty['C1'][idx] = test_label_suspect_empty['C1'][suspect_idx_temp[max_idx]]

# save the current labeled data
test_label_nominal_empty.drop(columns = ['C0'], inplace = True)
test_label_suspect_empty.drop(columns = ['C0'], inplace = True)
test_label_nominal_empty.to_csv(os.path.join(test_img_data_path,'test_label_nominal.csv'), header = False)
test_label_suspect_empty.to_csv(os.path.join(test_img_data_path,'test_label_suspect.csv'), header = False)

# save the submission data by mapping the numerical category to the name
submission_label = pd.DataFrame(data = test_label_nominal_empty['C1'][:].to_numpy(), \
                                index = test_label_nominal_empty.index, columns = ['Category']) 

label_rev_type = pd.read_json(os.path.join(train_img_data_path,'label_dict.json'), typ = 'series')
label_rev_dict = label_rev_type.to_dict()
label_rev_dict = dict(zip(label_rev_dict.values(),label_rev_dict.keys()))

submission_label['Category'] = submission_label['Category'].map(label_rev_dict)
submission_label.index.name = 'Id'

submission_label.to_csv(os.path.join(test_label_path,'submission_label.csv'), header = True)
