# Required imports

In [2]:
%load_ext autoreload
%autoreload 2

import joblib
import os
from sklearn.model_selection import train_test_split
from FaceBinaryDataset import FaceBinaryDataset
from build_pipeline import build_pipeline
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import GridSearchCV
import numpy as np
from sklearn.metrics import precision_score, recall_score
import time
import pandas as pd
import cv2
from PIL import ImageEnhance
import warnings
import matplotlib.pyplot as plt
import seaborn as sns

from metrics import evaluate_model,measure_prediction_time
from perturbations import test_model_on_perturbations



# Load Dataset

In [3]:
dataset = FaceBinaryDataset("dataset/output_dataset", dimension=160)

images = []
labels = []

for i in range(len(dataset)):
    img, label, userid = dataset[i]

    if img is None:
        continue

    images.append(img)
    labels.append(label)

images = np.array(images)
labels = np.array(labels)

No face detected in: dataset/output_dataset/authorized/user_00041/img_015.jpg
No face detected in: dataset/output_dataset/authorized/user_00041/img_013.jpg
No face detected in: dataset/output_dataset/authorized/user_00041/img_008.jpg
No face detected in: dataset/output_dataset/authorized/user_00079/img_003.jpg
No face detected in: dataset/output_dataset/authorized/user_00084/img_003.jpg
No face detected in: dataset/output_dataset/authorized/user_00084/img_011.jpg
No face detected in: dataset/output_dataset/authorized/user_00084/img_020.jpg
No face detected in: dataset/output_dataset/authorized/user_00070/img_020.jpg
No face detected in: dataset/output_dataset/authorized/user_00070/img_019.jpg
No face detected in: dataset/output_dataset/authorized/user_00077/img_003.jpg
No face detected in: dataset/output_dataset/authorized/user_00077/img_006.jpg
No face detected in: dataset/output_dataset/authorized/user_00083/img_012.jpg
No face detected in: dataset/output_dataset/authorized/user_0008

# Outer cross validation

In [5]:
seed = 42

cv_outer = RepeatedStratifiedKFold(n_splits=2, n_repeats=1, random_state=seed)

# LBP

In [6]:
param_grid = {
    'features__extractor__num_points': [8, 16, 24],
    'features__extractor__radius'    : [1, 2, 8],
    'classifier__kernel': ['linear', 'rbf'],
    'classifier__C': [0.1, 1, 100]
}

LBP_far_to_statistic=[]
LBP_frr_to_statistic=[]
LBP_precision_to_statistic=[]
LBP_recall_to_statistic=[]

LBP_ptr_results_to_statistic=[]
LBP_time_results_to_statistic=[]

for fold_idx, (train_idx, test_idx) in enumerate(cv_outer.split(images, labels), 1):
    X_train, X_test = images[train_idx], images[test_idx]
    y_train, y_test = labels[train_idx], labels[test_idx]

    pipeline = build_pipeline("lbp")

    cv_inner = RepeatedStratifiedKFold(n_splits=2, n_repeats=1, random_state=seed)

    grid_search_SVC = GridSearchCV(
        pipeline,
        param_grid=param_grid,
        cv=cv_inner,
        verbose=2,
        n_jobs=-1,
    )

    grid_search_SVC.fit(X_train, y_train)
    
    best_model = grid_search_SVC.best_estimator_

    far, frr, precision, recall = evaluate_model(best_model, X_test, y_test)

    LBP_far_to_statistic.append(far)
    LBP_frr_to_statistic.append(frr)
    LBP_precision_to_statistic.append(precision)
    LBP_recall_to_statistic.append(recall)

    ptr_results = test_model_on_perturbations(best_model, X_test, y_test)
    time_results = measure_prediction_time(best_model, X_test)
    
    LBP_time_results_to_statistic.append(time_results)
    LBP_ptr_results_to_statistic.append(ptr_results)
    

    print(f"Fold {fold_idx}: precision = {precision:.4f}")

Fitting 2 folds for each of 54 candidates, totalling 108 fits




Fold 1: accuracy = 0.5461
Fitting 2 folds for each of 54 candidates, totalling 108 fits


KeyboardInterrupt: 

# Saving results

In [11]:
df_general = pd.DataFrame({
    'FAR': LBP_far_to_statistic,
    'FRR': LBP_frr_to_statistic,
    'Precision': LBP_precision_to_statistic,
    'Recall': LBP_recall_to_statistic,
    'Time': LBP_time_results_to_statistic
})
df_general.to_csv("LBP_general_statistics.csv", index=False)
for fold in LBP_ptr_results_to_statistic:
    df_perturbations = pd.DataFrame(fold)
    df_perturbations.to_csv("LBP_perturbation_statistics.csv", index=False)

# HOG

In [None]:
param_grid = {
    'features__extractor__orientations': [8, 12],
    'features__extractor__pixels_per_cell': [(4, 4), (8, 8)],
    'features__extractor__cells_per_block': [(2, 2), (3, 3)],

    'classifier__kernel': ['linear', 'rbf'],
    'classifier__C': [0.1, 1, 100]
}

HOG_far_to_statistic=[]
HOG_frr_to_statistic=[]
HOG_precision_to_statistic=[]
HOG_recall_to_statistic=[]

HOG_ptr_results_to_statistic=[]
HOG_time_results_to_statistic=[]

for fold_idx, (train_idx, test_idx) in enumerate(cv_outer.split(images, labels), 1):
    X_train, X_test = images[train_idx], images[test_idx]
    y_train, y_test = labels[train_idx], labels[test_idx]

    pipeline = build_pipeline("hog")

    cv_inner = RepeatedStratifiedKFold(n_splits=3, n_repeats=1, random_state=seed)

    grid_search_SVC = GridSearchCV(
        pipeline,
        param_grid=param_grid,
        cv=cv_inner,
        verbose=2,
        n_jobs=-1,
    )

    grid_search_SVC.fit(X_train, y_train)

    best_model = grid_search_SVC.best_estimator_

    far, frr, precision, recall = evaluate_model(best_model, X_test, y_test)

    HOG_far_to_statistic.append(far)
    HOG_frr_to_statistic.append(frr)
    HOG_precision_to_statistic.append(precision)
    HOG_recall_to_statistic.append(recall)


    ptr_results = test_model_on_perturbations(best_model, X_test, y_test)
    time_results = measure_prediction_time(best_model, X_test)

    HOG_time_results_to_statistic.append(time_results)
    HOG_ptr_results_to_statistic.append(ptr_results)


    print(f"Fold {fold_idx}: precision = {precision:.4f}")

Fitting 3 folds for each of 48 candidates, totalling 144 fits




# Saving results

In [None]:
df_general = pd.DataFrame({
    'FAR': HOG_far_to_statistic,
    'FRR': HOG_frr_to_statistic,
    'Precision': HOG_precision_to_statistic,
    'Recall': HOG_recall_to_statistic,
    'Time': HOG_time_results_to_statistic
})
df_general.to_csv("HOG_general_statistics.csv", index=False)

for fold in HOG_ptr_results_to_statistic:
    df_perturbations = pd.DataFrame(fold)
    df_perturbations.to_csv("HOG_perturbation_statistics.csv", index=False)

# CNN

In [None]:
param_grid = {
    'classifier__kernel': ['linear', 'rbf'],
    'classifier__C': [0.1, 1, 100]
}

CNN_far_to_statistic=[]
CNN_frr_to_statistic=[]
CNN_precision_to_statistic=[]
CNN_recall_to_statistic=[]

CNN_ptr_results_to_statistic=[]
CNN_time_results_to_statistic=[]

for fold_idx, (train_idx, test_idx) in enumerate(cv_outer.split(images, labels), 1):
    X_train, X_test = images[train_idx], images[test_idx]
    y_train, y_test = labels[train_idx], labels[test_idx]

    pipeline = build_pipeline("cnn")

    cv_inner = RepeatedStratifiedKFold(n_splits=3, n_repeats=1, random_state=seed)

    grid_search_SVC = GridSearchCV(
        pipeline,
        param_grid=param_grid,
        cv=cv_inner,
        verbose=2,
        n_jobs=-1,
    )

    grid_search_SVC.fit(X_train, y_train)

    best_model = grid_search_SVC.best_estimator_

    far, frr, precision, recall = evaluate_model(best_model, X_test, y_test)

    CNN_far_to_statistic.append(far)
    CNN_frr_to_statistic.append(frr)
    CNN_precision_to_statistic.append(precision)
    CNN_recall_to_statistic.append(recall)


    ptr_results = test_model_on_perturbations(best_model, X_test, y_test)
    time_results = measure_prediction_time(best_model, X_test)

    CNN_time_results_to_statistic.append(time_results)
    CNN_ptr_results_to_statistic.append(ptr_results)


    print(f"Fold {fold_idx}: precision = {precision:.4f}")

# Save results

In [None]:
df_general = pd.DataFrame({
    'FAR': CNN_far_to_statistic,
    'FRR': CNN_frr_to_statistic,
    'Precision': CNN_precision_to_statistic,
    'Recall': CNN_recall_to_statistic,
    'Time': CNN_time_results_to_statistic
})
df_general.to_csv("CNN_general_statistics.csv", index=False)

for fold in CNN_ptr_results_to_statistic:
    df_perturbations = pd.DataFrame(fold)
    df_perturbations.to_csv("CNN_perturbation_statistics.csv", index=False)

# FaceNet

In [None]:
param_grid = {
    'classifier__kernel': ['linear', 'rbf'],
    'classifier__C': [0.1, 1, 100]
}

FN_far_to_statistic=[]
FN_frr_to_statistic=[]
FN_precision_to_statistic=[]
FN_recall_to_statistic=[]

FN_ptr_results_to_statistic=[]
FN_time_results_to_statistic=[]

for fold_idx, (train_idx, test_idx) in enumerate(cv_outer.split(images, labels), 1):
    X_train, X_test = images[train_idx], images[test_idx]
    y_train, y_test = labels[train_idx], labels[test_idx]

    pipeline = build_pipeline("facenet")

    cv_inner = RepeatedStratifiedKFold(n_splits=3, n_repeats=1, random_state=seed)

    grid_search_SVC = GridSearchCV(
        pipeline,
        param_grid=param_grid,
        cv=cv_inner,
        verbose=2,
        n_jobs=-1,
    )

    grid_search_SVC.fit(X_train, y_train)

    best_model = grid_search_SVC.best_estimator_

    far, frr, precision, recall = evaluate_model(best_model, X_test, y_test)

    FN_far_to_statistic.append(far)
    FN_frr_to_statistic.append(frr)
    FN_precision_to_statistic.append(precision)
    FN_recall_to_statistic.append(recall)

    far_ptr, frr_ptr, precision_ptr, recall_ptr = test_model_on_perturbations(best_model, X_test, y_test)

    ptr_results = test_model_on_perturbations(best_model, X_test, y_test)
    time_results = measure_prediction_time(best_model, X_test)

    FN_time_results_to_statistic.append(time_results)
    FN_ptr_results_to_statistic.append(ptr_results)


    print(f"Fold {fold_idx}: precision = {precision:.4f}")

# Save results

In [None]:
df_general = pd.DataFrame({
    'FAR': FN_far_to_statistic,
    'FRR': FN_frr_to_statistic,
    'Precision': FN_precision_to_statistic,
    'Recall': FN_recall_to_statistic,
    'Time': FN_time_results_to_statistic
})
df_general.to_csv("FN_general_statistics.csv", index=False)

for fold in FN_ptr_results_to_statistic:
    df_perturbations = pd.DataFrame(fold)
    df_perturbations.to_csv("FN_perturbation_statistics.csv", index=False)