Imports

In [None]:
import cv2
import numpy as np
import os
from time import time
import random
import pandas as pd

from random import seed
from random import randrange

import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from scipy.cluster.vq import kmeans, vq
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.model_selection import KFold,StratifiedKFold
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.linear_model import RidgeClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.gaussian_process.kernels import DotProduct
from sklearn.gaussian_process.kernels import Matern
from sklearn.gaussian_process.kernels import RationalQuadratic
from sklearn.gaussian_process.kernels import WhiteKernel

In [None]:
from functools import partial
from hyperopt import hp, fmin, tpe, Trials
from hyperopt.pyll.base import scope
import optuna
from sklearn.metrics import accuracy_score

In [None]:
train_path = 'C:/Users/Thomas/Documents/Datasets/lfw(250x250)/'  
training_names = os.listdir(train_path)

Image import for parameter tuning

In [None]:
#in these arrays the different folds are saved
face_data=[[],[],[],[],[]]
label_data=[[],[],[],[],[]]
kfol=5

print("Reading Images...")
t0 = time()
min_faces=30
target_names=[]
total_photos_seen=0
n_classes=0
folders = os.listdir(train_path)
for folder in folders:
    label = os.path.basename(folder)
    training_images_path = train_path + '/' + folder
    num_of_faces = len(os.listdir(training_images_path))
    if num_of_faces>=min_faces:   #people with low number of faces are skipped
        n_classes=n_classes+1
        target_names.append(label)
        faces=[]
        labels=[]
        for image in os.listdir(training_images_path):
            total_photos_seen=total_photos_seen+1
            image_path = training_images_path + '/' + image
            faces.append(image_path)
            labels.append(n_classes)
        seed(1)
        face_folds,label_folds = cross_validation_split(faces,labels, kfol) #here the different folds are created
        for i in range(0,kfol):
            face_data[i].extend(face_folds[i])
            label_data[i].extend(label_folds[i])
print("done in %0.3fs" % (time() - t0))

SVC

In [None]:
def optimize_svc(trial,x,y):
    svc__C=trial.suggest_uniform("C",0.001,100)
    svc__gamma=trial.suggest_uniform("gamma",0.0001,100)
    svc__kernel=trial.suggest_categorical("kernel",["rbf","linear", "poly","sigmoid"])
    svc__degree=trial.suggest_int("degree",1,6)
    model=SVC(class_weight='balanced',
           C=svc__C,
           gamma=svc__gamma,
           kernel=svc__kernel,
           degree=svc__degree)
    #kf=StratifiedKFold(n_splits=5)
    kfol=5
    accuracies=[]
    for k_f in range (1,kfol+1):
        print("Fold Number : ",k_f)
        #Training
        train_faces=[]
        test_faces=[]
        train_labels=[]
        test_labels=[]

        test_faces=face_data[k_f-1]
        test_labels=label_data[k_f-1]

        for z in range(0,kfol):
            if (z!=(k_f-1)):
                #print("Fold Chosen: ",z+1)
                train_faces.extend(face_data[z])
                train_labels.extend(label_data[z])

        image_paths=train_faces
        image_classes=train_labels
        test_image_paths=test_faces
        test_image_classes=test_labels
        des_list = []

        orb=cv2.ORB_create()
        #brisk = cv2.BRISK_create(30)
        #surf=cv2.xfeatures2d.SURF_create()
        #sift = cv2.xfeatures2d.SIFT_create(nOctaveLayers=3, contrastThreshold=0.03, edgeThreshold=10, sigma=1.6)

        list_to_delete=[] 

        print("Finding descriptors for ",len(image_paths)," training images")
        t0 = time()
        for i,image_path in enumerate(image_paths):
            im = cv2.imread(image_path)

            kpts, des = orb.detectAndCompute(im, None)
            #kpts, des = brisk.detectAndCompute(im, None)
            #kpts, des = surf.detectAndCompute(im, None)
            #kpts, des = sift.detectAndCompute(im, None)

            if des is not None:
                des_list.append((image_path, des))
            else:
                list_to_delete.append(i)

        new_image_paths = [j for i, j in enumerate(image_paths) if i not in list_to_delete]
        image_paths=new_image_paths

        new_image_classes = [j for i, j in enumerate(image_classes) if i not in list_to_delete]
        image_classes=new_image_classes
        print("done in %0.3fs" % (time() - t0))
        print("Found descriptors for ",len(image_paths)," training images")

        print("Stacking...")
        t0 = time()
        descriptors = des_list[0][1]
        for image_path, descriptor in des_list[1:]:
            descriptors = np.vstack((descriptors, descriptor))
        print("done in %0.3fs" % (time() - t0))

        descriptors_float = descriptors.astype(float) 

        print("Creating clusters and histogram...")
        t0 = time()
        k = 200
        voc, variance = kmeans(descriptors_float, k, 1)

        im_features = np.zeros((len(image_paths), k), "float32")
        for i in range(len(image_paths)):
            words, distance = vq(des_list[i][1],voc)
            for w in words:
                im_features[i][w] += 1

        nbr_occurences = np.sum( (im_features > 0) * 1, axis = 0)
        idf = np.array(np.log((1.0*len(image_paths)+1) / (1.0*nbr_occurences + 1)), 'float32')

        stdSlr = StandardScaler().fit(im_features)
        im_features = stdSlr.transform(im_features)
        print("done in %0.3fs" % (time() - t0))
        
        print("Training the model...")
        t0 = time()
        model.fit(im_features, np.array(image_classes))        
        print("Training done in %0.3fs" % (time() - t0))
        
        #Testing
        
        print("Finding descriptors for ",len(test_image_paths)," testing images")
        t0 = time()

        des_list = []

        list_to_delete=[]

        for i,image_path in enumerate(test_image_paths):
            im = cv2.imread(image_path)

            kpts, des = orb.detectAndCompute(im, None)
            #kpts, des = brisk.detectAndCompute(im, None)
            #kpts, des = surf.detectAndCompute(im, None)
            #kpts, des = sift.detectAndCompute(im, None)

            if des is not None:
                des_list.append((image_path, des))
            else:
                list_to_delete.append(i)

        new_image_paths = [j for i, j in enumerate(test_image_paths) if i not in list_to_delete]
        test_image_paths=new_image_paths

        new_image_classes = [j for i, j in enumerate(test_image_classes) if i not in list_to_delete]
        test_image_classes=new_image_classes
        print("done in %0.3fs" % (time() - t0))
        print("Found descriptors for ",len(test_image_paths)," testing images")

        print("Stacking...")
        t0 = time()
        descriptors = des_list[0][1]
        for image_path, descriptor in des_list[1:]:
            descriptors = np.vstack((descriptors, descriptor))
        print("done in %0.3fs" % (time() - t0))

        print("Calculating histogram, Scaling...")
        test_features = np.zeros((len(test_image_paths), k), "float32")
        for i in range(len(test_image_paths)):
            words, distance = vq(des_list[i][1],voc)
            for w in words:
                test_features[i][w] += 1

        nbr_occurences = np.sum( (test_features > 0) * 1, axis = 0)
        idf = np.array(np.log((1.0*len(test_image_paths)+1) / (1.0*nbr_occurences + 1)), 'float32')

        test_features = stdSlr.transform(test_features)
        print("done in %0.3fs" % (time() - t0))

        true_class =  [training_names[i-1] for i in test_image_classes]
        predictions =  [training_names[i-1] for i in model.predict(test_features)]
        
        #preds=model.predict(x_test)
        fold_acc=accuracy_score(true_class,predictions)
        accuracies.append(fold_acc)
        
    return - 1.0 * np.mean(accuracies)


In [None]:
optimization_function_svc=partial(optimize_svc,x=face_data,y=label_data)

In [None]:
study_svc=optuna.create_study(direction="minimize")

In [None]:
study_svc.optimize(optimization_function_svc,n_trials=150,n_jobs=-1)

In [None]:
print(study_svc.best_trial)

LinearSVC

In [None]:
def optimize_linsvc(trial,x,y):
    C=trial.suggest_uniform("C",0.001,100)
    loss=trial.suggest_categorical("loss",['hinge', 'squared_hinge'])
    class_weight=trial.suggest_categorical("class_weight",[None, 'balanced'])
    fit_intercept=trial.suggest_categorical("fit_intercept",[True,False])
    model = LinearSVC(C=C,loss=loss,fit_intercept=fit_intercept,class_weight=class_weight)
    kfol=5
    accuracies=[]
    for k_f in range (1,kfol+1):
        print("Fold Number : ",k_f)
        #Training
        train_faces=[]
        test_faces=[]
        train_labels=[]
        test_labels=[]

        test_faces=face_data[k_f-1]
        test_labels=label_data[k_f-1]

        for z in range(0,kfol):
            if (z!=(k_f-1)):
                #print("Fold Chosen: ",z+1)
                train_faces.extend(face_data[z])
                train_labels.extend(label_data[z])

        image_paths=train_faces
        image_classes=train_labels
        test_image_paths=test_faces
        test_image_classes=test_labels
        des_list = []

        orb=cv2.ORB_create()
        #brisk = cv2.BRISK_create(30)
        #surf=cv2.xfeatures2d.SURF_create()
        #sift = cv2.xfeatures2d.SIFT_create(nOctaveLayers=3, contrastThreshold=0.03, edgeThreshold=10, sigma=1.6)

        list_to_delete=[] 

        print("Finding descriptors for ",len(image_paths)," training images")
        t0 = time()
        for i,image_path in enumerate(image_paths):
            im = cv2.imread(image_path)

            kpts, des = orb.detectAndCompute(im, None)
            #kpts, des = brisk.detectAndCompute(im, None)
            #kpts, des = surf.detectAndCompute(im, None)
            #kpts, des = sift.detectAndCompute(im, None)

            if des is not None:
                des_list.append((image_path, des))
            else:
                list_to_delete.append(i)

        new_image_paths = [j for i, j in enumerate(image_paths) if i not in list_to_delete]
        image_paths=new_image_paths

        new_image_classes = [j for i, j in enumerate(image_classes) if i not in list_to_delete]
        image_classes=new_image_classes
        print("done in %0.3fs" % (time() - t0))
        print("Found descriptors for ",len(image_paths)," training images")

        print("Stacking...")
        t0 = time()
        descriptors = des_list[0][1]
        for image_path, descriptor in des_list[1:]:
            descriptors = np.vstack((descriptors, descriptor))
        print("done in %0.3fs" % (time() - t0))

        descriptors_float = descriptors.astype(float) 

        print("Creating clusters and histogram...")
        t0 = time()
        k = 200
        voc, variance = kmeans(descriptors_float, k, 1)

        im_features = np.zeros((len(image_paths), k), "float32")
        for i in range(len(image_paths)):
            words, distance = vq(des_list[i][1],voc)
            for w in words:
                im_features[i][w] += 1

        nbr_occurences = np.sum( (im_features > 0) * 1, axis = 0)
        idf = np.array(np.log((1.0*len(image_paths)+1) / (1.0*nbr_occurences + 1)), 'float32')

        stdSlr = StandardScaler().fit(im_features)
        im_features = stdSlr.transform(im_features)
        print("done in %0.3fs" % (time() - t0))
        
        print("Training the model...")
        t0 = time()
        model.fit(im_features, np.array(image_classes))        
        print("Training done in %0.3fs" % (time() - t0))
        
        #Testing
        
        print("Finding descriptors for ",len(test_image_paths)," testing images")
        t0 = time()

        des_list = []

        list_to_delete=[]

        for i,image_path in enumerate(test_image_paths):
            im = cv2.imread(image_path)

            kpts, des = orb.detectAndCompute(im, None)
            #kpts, des = brisk.detectAndCompute(im, None)
            #kpts, des = surf.detectAndCompute(im, None)
            #kpts, des = sift.detectAndCompute(im, None)

            if des is not None:
                des_list.append((image_path, des))
            else:
                list_to_delete.append(i)

        new_image_paths = [j for i, j in enumerate(test_image_paths) if i not in list_to_delete]
        test_image_paths=new_image_paths

        new_image_classes = [j for i, j in enumerate(test_image_classes) if i not in list_to_delete]
        test_image_classes=new_image_classes
        print("done in %0.3fs" % (time() - t0))
        print("Found descriptors for ",len(test_image_paths)," testing images")

        print("Stacking...")
        t0 = time()
        descriptors = des_list[0][1]
        for image_path, descriptor in des_list[1:]:
            descriptors = np.vstack((descriptors, descriptor))
        print("done in %0.3fs" % (time() - t0))

        print("Calculating histogram, Scaling...")
        test_features = np.zeros((len(test_image_paths), k), "float32")
        for i in range(len(test_image_paths)):
            words, distance = vq(des_list[i][1],voc)
            for w in words:
                test_features[i][w] += 1

        nbr_occurences = np.sum( (test_features > 0) * 1, axis = 0)
        idf = np.array(np.log((1.0*len(test_image_paths)+1) / (1.0*nbr_occurences + 1)), 'float32')

        test_features = stdSlr.transform(test_features)
        print("done in %0.3fs" % (time() - t0))

        true_class =  [training_names[i-1] for i in test_image_classes]
        predictions =  [training_names[i-1] for i in model.predict(test_features)]
        
        #preds=model.predict(x_test)
        fold_acc=accuracy_score(true_class,predictions)
        accuracies.append(fold_acc)
        
    return - 1.0 * np.mean(accuracies)

In [None]:
optimization_function_linsvc=partial(optimize_linsvc,x=face_data,y=label_data)

In [None]:
study_linsvc=optuna.create_study(direction="minimize")

In [None]:
study_linsvc.optimize(optimization_function_linsvc,n_trials=150,n_jobs=-1)

In [None]:
print(study_linsvc.best_trial)

Random Forrest

In [None]:
def optimize_forrest(trial,x,y):
    #impurity_decrease=trial.suggest_uniform("impurity_decrease",0.0,0.3)
    #min_impurity_split=trial.suggest_uniform("min_impurity_split",,)
    list_max_depth=[None,2,3,4,5,6,7,8,9,10] 
    max_depth=trial.suggest_categorical("max_depth",list_max_depth)
    min_samples_leaf=trial.suggest_int("min_samples_leaf",1,4)
    n_estimators=trial.suggest_int("n_estimators",10,600)
    class_weight=trial.suggest_categorical("class_weight",[None, 'balanced'])
    bootstrap=trial.suggest_categorical("bootstrap",[False, True])
    model = RandomForestClassifier(random_state=0,
                                    max_depth=max_depth,
                                    bootstrap=bootstrap,
                                    class_weight=class_weight,
                                    min_samples_leaf=min_samples_leaf,
                                    n_estimators=n_estimators)
    kfol=5
    accuracies=[]
    for k_f in range (1,kfol+1):
        print("Fold Number : ",k_f)
        #Training
        train_faces=[]
        test_faces=[]
        train_labels=[]
        test_labels=[]

        test_faces=face_data[k_f-1]
        test_labels=label_data[k_f-1]

        for z in range(0,kfol):
            if (z!=(k_f-1)):
                #print("Fold Chosen: ",z+1)
                train_faces.extend(face_data[z])
                train_labels.extend(label_data[z])

        image_paths=train_faces
        image_classes=train_labels
        test_image_paths=test_faces
        test_image_classes=test_labels
        des_list = []

        orb=cv2.ORB_create()
        #brisk = cv2.BRISK_create(30)
        #surf=cv2.xfeatures2d.SURF_create()
        #sift = cv2.xfeatures2d.SIFT_create(nOctaveLayers=3, contrastThreshold=0.03, edgeThreshold=10, sigma=1.6)

        list_to_delete=[] 

        print("Finding descriptors for ",len(image_paths)," training images")
        t0 = time()
        for i,image_path in enumerate(image_paths):
            im = cv2.imread(image_path)

            kpts, des = orb.detectAndCompute(im, None)
            #kpts, des = brisk.detectAndCompute(im, None)
            #kpts, des = surf.detectAndCompute(im, None)
            #kpts, des = sift.detectAndCompute(im, None)

            if des is not None:
                des_list.append((image_path, des))
            else:
                list_to_delete.append(i)

        new_image_paths = [j for i, j in enumerate(image_paths) if i not in list_to_delete]
        image_paths=new_image_paths

        new_image_classes = [j for i, j in enumerate(image_classes) if i not in list_to_delete]
        image_classes=new_image_classes
        print("done in %0.3fs" % (time() - t0))
        print("Found descriptors for ",len(image_paths)," training images")

        print("Stacking...")
        t0 = time()
        descriptors = des_list[0][1]
        for image_path, descriptor in des_list[1:]:
            descriptors = np.vstack((descriptors, descriptor))
        print("done in %0.3fs" % (time() - t0))

        descriptors_float = descriptors.astype(float) 

        print("Creating clusters and histogram...")
        t0 = time()
        k = 200
        voc, variance = kmeans(descriptors_float, k, 1)

        im_features = np.zeros((len(image_paths), k), "float32")
        for i in range(len(image_paths)):
            words, distance = vq(des_list[i][1],voc)
            for w in words:
                im_features[i][w] += 1

        nbr_occurences = np.sum( (im_features > 0) * 1, axis = 0)
        idf = np.array(np.log((1.0*len(image_paths)+1) / (1.0*nbr_occurences + 1)), 'float32')

        stdSlr = StandardScaler().fit(im_features)
        im_features = stdSlr.transform(im_features)
        print("done in %0.3fs" % (time() - t0))
        
        print("Training the model...")
        t0 = time()
        model.fit(im_features, np.array(image_classes))        
        print("Training done in %0.3fs" % (time() - t0))
        
        #Testing
        
        print("Finding descriptors for ",len(test_image_paths)," testing images")
        t0 = time()

        des_list = []

        list_to_delete=[]

        for i,image_path in enumerate(test_image_paths):
            im = cv2.imread(image_path)

            kpts, des = orb.detectAndCompute(im, None)
            #kpts, des = brisk.detectAndCompute(im, None)
            #kpts, des = surf.detectAndCompute(im, None)
            #kpts, des = sift.detectAndCompute(im, None)

            if des is not None:
                des_list.append((image_path, des))
            else:
                list_to_delete.append(i)

        new_image_paths = [j for i, j in enumerate(test_image_paths) if i not in list_to_delete]
        test_image_paths=new_image_paths

        new_image_classes = [j for i, j in enumerate(test_image_classes) if i not in list_to_delete]
        test_image_classes=new_image_classes
        print("done in %0.3fs" % (time() - t0))
        print("Found descriptors for ",len(test_image_paths)," testing images")

        print("Stacking...")
        t0 = time()
        descriptors = des_list[0][1]
        for image_path, descriptor in des_list[1:]:
            descriptors = np.vstack((descriptors, descriptor))
        print("done in %0.3fs" % (time() - t0))

        print("Calculating histogram, Scaling...")
        test_features = np.zeros((len(test_image_paths), k), "float32")
        for i in range(len(test_image_paths)):
            words, distance = vq(des_list[i][1],voc)
            for w in words:
                test_features[i][w] += 1

        nbr_occurences = np.sum( (test_features > 0) * 1, axis = 0)
        idf = np.array(np.log((1.0*len(test_image_paths)+1) / (1.0*nbr_occurences + 1)), 'float32')

        test_features = stdSlr.transform(test_features)
        print("done in %0.3fs" % (time() - t0))

        true_class =  [training_names[i-1] for i in test_image_classes]
        predictions =  [training_names[i-1] for i in model.predict(test_features)]
        
        #preds=model.predict(x_test)
        fold_acc=accuracy_score(true_class,predictions)
        accuracies.append(fold_acc)
        
    return - 1.0 * np.mean(accuracies)

In [None]:
optimization_function_forrest=partial(optimize_forrest,x=face_data,y=label_data)

In [None]:
study_forrest=optuna.create_study(direction="minimize")

In [None]:
study_forrest.optimize(optimization_function_forrest,n_trials=150,n_jobs=-1)

In [None]:
print(study_forrest.best_trial)

Logistic Regression

In [None]:
def optimize_logreg(trial,x,y):
    class_weight=trial.suggest_categorical("class_weight",[None,"balanced"])
    C=trial.suggest_uniform("C",0.001,100)
    solver=trial.suggest_categorical("solver",['newton-cg', 'lbfgs', 'sag', 'saga'])
    model = LogisticRegression(C=C,class_weight=class_weight,solver=solver)

    kfol=5
    accuracies=[]
    for k_f in range (1,kfol+1):
        print("Fold Number : ",k_f)
        #Training
        train_faces=[]
        test_faces=[]
        train_labels=[]
        test_labels=[]

        test_faces=face_data[k_f-1]
        test_labels=label_data[k_f-1]

        for z in range(0,kfol):
            if (z!=(k_f-1)):
                #print("Fold Chosen: ",z+1)
                train_faces.extend(face_data[z])
                train_labels.extend(label_data[z])

        image_paths=train_faces
        image_classes=train_labels
        test_image_paths=test_faces
        test_image_classes=test_labels
        des_list = []

        orb=cv2.ORB_create()
        #brisk = cv2.BRISK_create(30)
        #surf=cv2.xfeatures2d.SURF_create()
        #sift = cv2.xfeatures2d.SIFT_create(nOctaveLayers=3, contrastThreshold=0.03, edgeThreshold=10, sigma=1.6)

        list_to_delete=[] 

        print("Finding descriptors for ",len(image_paths)," training images")
        t0 = time()
        for i,image_path in enumerate(image_paths):
            im = cv2.imread(image_path)

            kpts, des = orb.detectAndCompute(im, None)
            #kpts, des = brisk.detectAndCompute(im, None)
            #kpts, des = surf.detectAndCompute(im, None)
            #kpts, des = sift.detectAndCompute(im, None)

            if des is not None:
                des_list.append((image_path, des))
            else:
                list_to_delete.append(i)

        new_image_paths = [j for i, j in enumerate(image_paths) if i not in list_to_delete]
        image_paths=new_image_paths

        new_image_classes = [j for i, j in enumerate(image_classes) if i not in list_to_delete]
        image_classes=new_image_classes
        print("done in %0.3fs" % (time() - t0))
        print("Found descriptors for ",len(image_paths)," training images")

        print("Stacking...")
        t0 = time()
        descriptors = des_list[0][1]
        for image_path, descriptor in des_list[1:]:
            descriptors = np.vstack((descriptors, descriptor))
        print("done in %0.3fs" % (time() - t0))

        descriptors_float = descriptors.astype(float) 

        print("Creating clusters and histogram...")
        t0 = time()
        k = 200
        voc, variance = kmeans(descriptors_float, k, 1)

        im_features = np.zeros((len(image_paths), k), "float32")
        for i in range(len(image_paths)):
            words, distance = vq(des_list[i][1],voc)
            for w in words:
                im_features[i][w] += 1

        nbr_occurences = np.sum( (im_features > 0) * 1, axis = 0)
        idf = np.array(np.log((1.0*len(image_paths)+1) / (1.0*nbr_occurences + 1)), 'float32')

        stdSlr = StandardScaler().fit(im_features)
        im_features = stdSlr.transform(im_features)
        print("done in %0.3fs" % (time() - t0))
        
        print("Training the model...")
        t0 = time()
        model.fit(im_features, np.array(image_classes))        
        print("Training done in %0.3fs" % (time() - t0))
        
        #Testing
        
        print("Finding descriptors for ",len(test_image_paths)," testing images")
        t0 = time()

        des_list = []

        list_to_delete=[]

        for i,image_path in enumerate(test_image_paths):
            im = cv2.imread(image_path)

            kpts, des = orb.detectAndCompute(im, None)
            #kpts, des = brisk.detectAndCompute(im, None)
            #kpts, des = surf.detectAndCompute(im, None)
            #kpts, des = sift.detectAndCompute(im, None)

            if des is not None:
                des_list.append((image_path, des))
            else:
                list_to_delete.append(i)

        new_image_paths = [j for i, j in enumerate(test_image_paths) if i not in list_to_delete]
        test_image_paths=new_image_paths

        new_image_classes = [j for i, j in enumerate(test_image_classes) if i not in list_to_delete]
        test_image_classes=new_image_classes
        print("done in %0.3fs" % (time() - t0))
        print("Found descriptors for ",len(test_image_paths)," testing images")

        print("Stacking...")
        t0 = time()
        descriptors = des_list[0][1]
        for image_path, descriptor in des_list[1:]:
            descriptors = np.vstack((descriptors, descriptor))
        print("done in %0.3fs" % (time() - t0))

        print("Calculating histogram, Scaling...")
        test_features = np.zeros((len(test_image_paths), k), "float32")
        for i in range(len(test_image_paths)):
            words, distance = vq(des_list[i][1],voc)
            for w in words:
                test_features[i][w] += 1

        nbr_occurences = np.sum( (test_features > 0) * 1, axis = 0)
        idf = np.array(np.log((1.0*len(test_image_paths)+1) / (1.0*nbr_occurences + 1)), 'float32')

        test_features = stdSlr.transform(test_features)
        print("done in %0.3fs" % (time() - t0))

        true_class =  [training_names[i-1] for i in test_image_classes]
        predictions =  [training_names[i-1] for i in model.predict(test_features)]
        
        #preds=model.predict(x_test)
        fold_acc=accuracy_score(true_class,predictions)
        accuracies.append(fold_acc)
        
    return - 1.0 * np.mean(accuracies)

In [None]:
optimization_function_logreg=partial(optimize_logreg,x=face_data,y=label_data)

In [None]:
study_logreg=optuna.create_study(direction="minimize")

In [None]:
study_logreg.optimize(optimization_function_logreg,n_trials=150,n_jobs=-1)

In [None]:
print(study_logreg.best_trial)

Decision Trees

In [None]:
def optimize_trees(trial,x,y):
    list_max_depth=[None,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]
    list_max_leaf_nodes=[None,2,3,4,5,6,7]
    list_max_features=[None,1,2,3,4,5,6,7,8,9,10]
    
    criterion=trial.suggest_categorical("criterion",["gini","entropy"])
    min_impurity_decrease=trial.suggest_uniform("min_impurity_decrease",0.0,0.3)
    max_depth=trial.suggest_categorical("max_depth",list_max_depth)
    min_samples_leaf=trial.suggest_int("min_samples_leaf",1,10)
    max_leaf_nodes=trial.suggest_categorical("max_leaf_nodes",list_max_leaf_nodes)
    max_features=trial.suggest_categorical("max_features",list_max_features)
    model = DecisionTreeClassifier(random_state=0,
                                 min_impurity_decrease=min_impurity_decrease,
                                 max_depth=max_depth,
                                 min_samples_leaf=min_samples_leaf,
                                 max_leaf_nodes=max_leaf_nodes,
                                 max_features=max_features,
                                 criterion=criterion)
    kfol=5
    accuracies=[]
    for k_f in range (1,kfol+1):
        print("Fold Number : ",k_f)
        #Training
        train_faces=[]
        test_faces=[]
        train_labels=[]
        test_labels=[]

        test_faces=face_data[k_f-1]
        test_labels=label_data[k_f-1]

        for z in range(0,kfol):
            if (z!=(k_f-1)):
                #print("Fold Chosen: ",z+1)
                train_faces.extend(face_data[z])
                train_labels.extend(label_data[z])

        image_paths=train_faces
        image_classes=train_labels
        test_image_paths=test_faces
        test_image_classes=test_labels
        des_list = []

        orb=cv2.ORB_create()
        #brisk = cv2.BRISK_create(30)
        #surf=cv2.xfeatures2d.SURF_create()
        #sift = cv2.xfeatures2d.SIFT_create(nOctaveLayers=3, contrastThreshold=0.03, edgeThreshold=10, sigma=1.6)

        list_to_delete=[] 

        print("Finding descriptors for ",len(image_paths)," training images")
        t0 = time()
        for i,image_path in enumerate(image_paths):
            im = cv2.imread(image_path)

            kpts, des = orb.detectAndCompute(im, None)
            #kpts, des = brisk.detectAndCompute(im, None)
            #kpts, des = surf.detectAndCompute(im, None)
            #kpts, des = sift.detectAndCompute(im, None)

            if des is not None:
                des_list.append((image_path, des))
            else:
                list_to_delete.append(i)

        new_image_paths = [j for i, j in enumerate(image_paths) if i not in list_to_delete]
        image_paths=new_image_paths

        new_image_classes = [j for i, j in enumerate(image_classes) if i not in list_to_delete]
        image_classes=new_image_classes
        print("done in %0.3fs" % (time() - t0))
        print("Found descriptors for ",len(image_paths)," training images")

        print("Stacking...")
        t0 = time()
        descriptors = des_list[0][1]
        for image_path, descriptor in des_list[1:]:
            descriptors = np.vstack((descriptors, descriptor))
        print("done in %0.3fs" % (time() - t0))

        descriptors_float = descriptors.astype(float) 

        print("Creating clusters and histogram...")
        t0 = time()
        k = 200
        voc, variance = kmeans(descriptors_float, k, 1)

        im_features = np.zeros((len(image_paths), k), "float32")
        for i in range(len(image_paths)):
            words, distance = vq(des_list[i][1],voc)
            for w in words:
                im_features[i][w] += 1

        nbr_occurences = np.sum( (im_features > 0) * 1, axis = 0)
        idf = np.array(np.log((1.0*len(image_paths)+1) / (1.0*nbr_occurences + 1)), 'float32')

        stdSlr = StandardScaler().fit(im_features)
        im_features = stdSlr.transform(im_features)
        print("done in %0.3fs" % (time() - t0))
        
        print("Training the model...")
        t0 = time()
        model.fit(im_features, np.array(image_classes))        
        print("Training done in %0.3fs" % (time() - t0))
        
        #Testing
        
        print("Finding descriptors for ",len(test_image_paths)," testing images")
        t0 = time()

        des_list = []

        list_to_delete=[]

        for i,image_path in enumerate(test_image_paths):
            im = cv2.imread(image_path)

            kpts, des = orb.detectAndCompute(im, None)
            #kpts, des = brisk.detectAndCompute(im, None)
            #kpts, des = surf.detectAndCompute(im, None)
            #kpts, des = sift.detectAndCompute(im, None)

            if des is not None:
                des_list.append((image_path, des))
            else:
                list_to_delete.append(i)

        new_image_paths = [j for i, j in enumerate(test_image_paths) if i not in list_to_delete]
        test_image_paths=new_image_paths

        new_image_classes = [j for i, j in enumerate(test_image_classes) if i not in list_to_delete]
        test_image_classes=new_image_classes
        print("done in %0.3fs" % (time() - t0))
        print("Found descriptors for ",len(test_image_paths)," testing images")

        print("Stacking...")
        t0 = time()
        descriptors = des_list[0][1]
        for image_path, descriptor in des_list[1:]:
            descriptors = np.vstack((descriptors, descriptor))
        print("done in %0.3fs" % (time() - t0))

        print("Calculating histogram, Scaling...")
        test_features = np.zeros((len(test_image_paths), k), "float32")
        for i in range(len(test_image_paths)):
            words, distance = vq(des_list[i][1],voc)
            for w in words:
                test_features[i][w] += 1

        nbr_occurences = np.sum( (test_features > 0) * 1, axis = 0)
        idf = np.array(np.log((1.0*len(test_image_paths)+1) / (1.0*nbr_occurences + 1)), 'float32')

        test_features = stdSlr.transform(test_features)
        print("done in %0.3fs" % (time() - t0))

        true_class =  [training_names[i-1] for i in test_image_classes]
        predictions =  [training_names[i-1] for i in model.predict(test_features)]
        
        #preds=model.predict(x_test)
        fold_acc=accuracy_score(true_class,predictions)
        accuracies.append(fold_acc)
        
    return - 1.0 * np.mean(accuracies)

In [None]:
optimization_function_trees=partial(optimize_trees,x=face_data,y=label_data)

In [None]:
study_trees=optuna.create_study(direction="minimize")

In [None]:
study_trees.optimize(optimization_function_trees,n_trials=150,n_jobs=-1)

In [None]:
print(study_trees.best_trial)

kNN

In [None]:
def optimize_knn(trial,x,y):
    n_neighbors=trial.suggest_int("n_neighbors",1,15)
    p=trial.suggest_int("p",1,5)
    leaf_size=trial.suggest_int("leaf_size",10,50)
    model = KNeighborsClassifier(n_neighbors=n_neighbors,p=p,leaf_size=leaf_size)
    accuracies=[]
    kfol=5
    for k_f in range (1,kfol+1):
        print("Fold Number : ",k_f)
        #Training
        train_faces=[]
        test_faces=[]
        train_labels=[]
        test_labels=[]

        test_faces=face_data[k_f-1]
        test_labels=label_data[k_f-1]

        for z in range(0,kfol):
            if (z!=(k_f-1)):
                #print("Fold Chosen: ",z+1)
                train_faces.extend(face_data[z])
                train_labels.extend(label_data[z])

        image_paths=train_faces
        image_classes=train_labels
        test_image_paths=test_faces
        test_image_classes=test_labels
        des_list = []

        orb=cv2.ORB_create()
        #brisk = cv2.BRISK_create(30)
        #surf=cv2.xfeatures2d.SURF_create()
        #sift = cv2.xfeatures2d.SIFT_create(nOctaveLayers=3, contrastThreshold=0.03, edgeThreshold=10, sigma=1.6)

        list_to_delete=[] 

        print("Finding descriptors for ",len(image_paths)," training images")
        t0 = time()
        for i,image_path in enumerate(image_paths):
            im = cv2.imread(image_path)

            kpts, des = orb.detectAndCompute(im, None)
            #kpts, des = brisk.detectAndCompute(im, None)
            #kpts, des = surf.detectAndCompute(im, None)
            #kpts, des = sift.detectAndCompute(im, None)

            if des is not None:
                des_list.append((image_path, des))
            else:
                list_to_delete.append(i)

        new_image_paths = [j for i, j in enumerate(image_paths) if i not in list_to_delete]
        image_paths=new_image_paths

        new_image_classes = [j for i, j in enumerate(image_classes) if i not in list_to_delete]
        image_classes=new_image_classes
        print("done in %0.3fs" % (time() - t0))
        print("Found descriptors for ",len(image_paths)," training images")

        print("Stacking...")
        t0 = time()
        descriptors = des_list[0][1]
        for image_path, descriptor in des_list[1:]:
            descriptors = np.vstack((descriptors, descriptor))
        print("done in %0.3fs" % (time() - t0))

        descriptors_float = descriptors.astype(float) 

        print("Creating clusters and histogram...")
        t0 = time()
        k = 200
        voc, variance = kmeans(descriptors_float, k, 1)

        im_features = np.zeros((len(image_paths), k), "float32")
        for i in range(len(image_paths)):
            words, distance = vq(des_list[i][1],voc)
            for w in words:
                im_features[i][w] += 1

        nbr_occurences = np.sum( (im_features > 0) * 1, axis = 0)
        idf = np.array(np.log((1.0*len(image_paths)+1) / (1.0*nbr_occurences + 1)), 'float32')

        stdSlr = StandardScaler().fit(im_features)
        im_features = stdSlr.transform(im_features)
        print("done in %0.3fs" % (time() - t0))
        
        print("Training the model...")
        t0 = time()
        model.fit(im_features, np.array(image_classes))        
        print("Training done in %0.3fs" % (time() - t0))
        
        #Testing
        
        print("Finding descriptors for ",len(test_image_paths)," testing images")
        t0 = time()

        des_list = []

        list_to_delete=[]

        for i,image_path in enumerate(test_image_paths):
            im = cv2.imread(image_path)

            kpts, des = orb.detectAndCompute(im, None)
            #kpts, des = brisk.detectAndCompute(im, None)
            #kpts, des = surf.detectAndCompute(im, None)
            #kpts, des = sift.detectAndCompute(im, None)

            if des is not None:
                des_list.append((image_path, des))
            else:
                list_to_delete.append(i)

        new_image_paths = [j for i, j in enumerate(test_image_paths) if i not in list_to_delete]
        test_image_paths=new_image_paths

        new_image_classes = [j for i, j in enumerate(test_image_classes) if i not in list_to_delete]
        test_image_classes=new_image_classes
        print("done in %0.3fs" % (time() - t0))
        print("Found descriptors for ",len(test_image_paths)," testing images")

        print("Stacking...")
        t0 = time()
        descriptors = des_list[0][1]
        for image_path, descriptor in des_list[1:]:
            descriptors = np.vstack((descriptors, descriptor))
        print("done in %0.3fs" % (time() - t0))

        print("Calculating histogram, Scaling...")
        test_features = np.zeros((len(test_image_paths), k), "float32")
        for i in range(len(test_image_paths)):
            words, distance = vq(des_list[i][1],voc)
            for w in words:
                test_features[i][w] += 1

        nbr_occurences = np.sum( (test_features > 0) * 1, axis = 0)
        idf = np.array(np.log((1.0*len(test_image_paths)+1) / (1.0*nbr_occurences + 1)), 'float32')

        test_features = stdSlr.transform(test_features)
        print("done in %0.3fs" % (time() - t0))

        true_class =  [training_names[i-1] for i in test_image_classes]
        predictions =  [training_names[i-1] for i in model.predict(test_features)]
        
        #preds=model.predict(x_test)
        fold_acc=accuracy_score(true_class,predictions)
        accuracies.append(fold_acc)
        
    return - 1.0 * np.mean(accuracies)

In [None]:
optimization_function_knn=partial(optimize_knn,x=face_data,y=label_data)

In [None]:
study_knn=optuna.create_study(direction="minimize")

In [None]:
study_knn.optimize(optimization_function_knn,n_trials=150,n_jobs=-1)

In [None]:
print(study_knn.best_trial)

MLP

In [None]:
def optimize_mlp(trial,x,y):
    n_layers = trial.suggest_int('n_layers', 1,10)
    layers = []
    for i in range(n_layers):
        layers.append(trial.suggest_int(f'n_units_{i}', 1, 500))
    
    alpha=trial.suggest_uniform("alpha",0.0001,0.05)
    learning_rate=trial.suggest_categorical("learning_rate",["constant","adaptive"])
    activation=trial.suggest_categorical("activation",["tanh","relu"])
    solver=trial.suggest_categorical("solver",["sgd","adam"])
    momentum = trial.suggest_uniform('momentum', 0.0, 1.0)
    
    model = MLPClassifier(solver=solver,activation=activation,hidden_layer_sizes=tuple(layers),learning_rate=learning_rate,alpha=alpha,momentum=momentum, verbose=0, early_stopping=True)
    #kf=StratifiedKFold(n_splits=5)
    kfol=5
    accuracies=[]
    for k_f in range (1,kfol+1):

        print("Fold Number : ",k_f)

        #Training

        train_faces=[]
        test_faces=[]
        train_labels=[]
        test_labels=[]

        test_faces=face_data[k_f-1]
        test_labels=label_data[k_f-1]

        for z in range(0,kfol):
            if (z!=(k_f-1)):
                #print("Fold Chosen: ",z+1)
                train_faces.extend(face_data[z])
                train_labels.extend(label_data[z])

        image_paths=train_faces
        image_classes=train_labels
        test_image_paths=test_faces
        test_image_classes=test_labels
        des_list = []

        orb=cv2.ORB_create()
        #brisk = cv2.BRISK_create(30)
        #surf=cv2.xfeatures2d.SURF_create()
        #sift = cv2.xfeatures2d.SIFT_create(nOctaveLayers=3, contrastThreshold=0.03, edgeThreshold=10, sigma=1.6)

        list_to_delete=[] 

        print("Finding descriptors for ",len(image_paths)," training images")
        t0 = time()
        for i,image_path in enumerate(image_paths):
            im = cv2.imread(image_path)

            kpts, des = orb.detectAndCompute(im, None)
            #kpts, des = brisk.detectAndCompute(im, None)
            #kpts, des = surf.detectAndCompute(im, None)
            #kpts, des = sift.detectAndCompute(im, None)

            if des is not None:
                des_list.append((image_path, des))
            else:
                list_to_delete.append(i)

        new_image_paths = [j for i, j in enumerate(image_paths) if i not in list_to_delete]
        image_paths=new_image_paths

        new_image_classes = [j for i, j in enumerate(image_classes) if i not in list_to_delete]
        image_classes=new_image_classes
        print("done in %0.3fs" % (time() - t0))
        print("Found descriptors for ",len(image_paths)," training images")

        print("Stacking...")
        t0 = time()
        descriptors = des_list[0][1]
        for image_path, descriptor in des_list[1:]:
            descriptors = np.vstack((descriptors, descriptor))
        print("done in %0.3fs" % (time() - t0))

        descriptors_float = descriptors.astype(float) 

        print("Creating clusters and histogram...")
        t0 = time()
        k = 200
        voc, variance = kmeans(descriptors_float, k, 1)

        im_features = np.zeros((len(image_paths), k), "float32")
        for i in range(len(image_paths)):
            words, distance = vq(des_list[i][1],voc)
            for w in words:
                im_features[i][w] += 1

        nbr_occurences = np.sum( (im_features > 0) * 1, axis = 0)
        idf = np.array(np.log((1.0*len(image_paths)+1) / (1.0*nbr_occurences + 1)), 'float32')

        stdSlr = StandardScaler().fit(im_features)
        im_features = stdSlr.transform(im_features)
        print("done in %0.3fs" % (time() - t0))
        
        print("Training the model...")
        t0 = time()
        model.fit(im_features, np.array(image_classes))        
        print("Training done in %0.3fs" % (time() - t0))
        
        #Testing
        
        print("Finding descriptors for ",len(test_image_paths)," testing images")
        t0 = time()

        des_list = []

        list_to_delete=[]

        for i,image_path in enumerate(test_image_paths):
            im = cv2.imread(image_path)

            kpts, des = orb.detectAndCompute(im, None)
            #kpts, des = brisk.detectAndCompute(im, None)
            #kpts, des = surf.detectAndCompute(im, None)
            #kpts, des = sift.detectAndCompute(im, None)

            if des is not None:
                des_list.append((image_path, des))
            else:
                list_to_delete.append(i)

        new_image_paths = [j for i, j in enumerate(test_image_paths) if i not in list_to_delete]
        test_image_paths=new_image_paths

        new_image_classes = [j for i, j in enumerate(test_image_classes) if i not in list_to_delete]
        test_image_classes=new_image_classes
        print("done in %0.3fs" % (time() - t0))
        print("Found descriptors for ",len(test_image_paths)," testing images")

        print("Stacking...")
        t0 = time()
        descriptors = des_list[0][1]
        for image_path, descriptor in des_list[1:]:
            descriptors = np.vstack((descriptors, descriptor))
        print("done in %0.3fs" % (time() - t0))

        print("Calculating histogram, Scaling...")
        test_features = np.zeros((len(test_image_paths), k), "float32")
        for i in range(len(test_image_paths)):
            words, distance = vq(des_list[i][1],voc)
            for w in words:
                test_features[i][w] += 1

        nbr_occurences = np.sum( (test_features > 0) * 1, axis = 0)
        idf = np.array(np.log((1.0*len(test_image_paths)+1) / (1.0*nbr_occurences + 1)), 'float32')

        test_features = stdSlr.transform(test_features)
        print("done in %0.3fs" % (time() - t0))

        true_class =  [training_names[i-1] for i in test_image_classes]
        predictions =  [training_names[i-1] for i in model.predict(test_features)]
        
        #preds=model.predict(x_test)
        fold_acc=accuracy_score(true_class,predictions)
        accuracies.append(fold_acc)
        
    return - 1.0 * np.mean(accuracies)

In [None]:
optimization_function_mlp=partial(optimize_mlp,x=face_data,y=label_data)

In [None]:
study_mlp=optuna.create_study(direction="minimize")

In [None]:
study_mlp.optimize(optimization_function_mlp,n_trials=150)

In [None]:
print(study_mlp.best_trial)

AdaBoost Classifier

In [None]:
def optimize_ada(trial,x,y):
    n_estimators=trial.suggest_int("n_estimators",50,300)
    learning_rate=trial.suggest_uniform("learning_rate",0.001,1.0)
    model = AdaBoostClassifier(random_state=0,
                                 n_estimators=n_estimators,
                                 learning_rate=learning_rate)
    kfol=5
    accuracies=[]
    for k_f in range (1,kfol+1):
        print("Fold Number : ",k_f)
        #Training
        train_faces=[]
        test_faces=[]
        train_labels=[]
        test_labels=[]

        test_faces=face_data[k_f-1]
        test_labels=label_data[k_f-1]

        for z in range(0,kfol):
            if (z!=(k_f-1)):
                #print("Fold Chosen: ",z+1)
                train_faces.extend(face_data[z])
                train_labels.extend(label_data[z])

        image_paths=train_faces
        image_classes=train_labels
        test_image_paths=test_faces
        test_image_classes=test_labels
        des_list = []

        orb=cv2.ORB_create()
        #brisk = cv2.BRISK_create(30)
        #surf=cv2.xfeatures2d.SURF_create()
        #sift = cv2.xfeatures2d.SIFT_create(nOctaveLayers=3, contrastThreshold=0.03, edgeThreshold=10, sigma=1.6)

        list_to_delete=[] 

        print("Finding descriptors for ",len(image_paths)," training images")
        t0 = time()
        for i,image_path in enumerate(image_paths):
            im = cv2.imread(image_path)

            kpts, des = orb.detectAndCompute(im, None)
            #kpts, des = brisk.detectAndCompute(im, None)
            #kpts, des = surf.detectAndCompute(im, None)
            #kpts, des = sift.detectAndCompute(im, None)

            if des is not None:
                des_list.append((image_path, des))
            else:
                list_to_delete.append(i)

        new_image_paths = [j for i, j in enumerate(image_paths) if i not in list_to_delete]
        image_paths=new_image_paths

        new_image_classes = [j for i, j in enumerate(image_classes) if i not in list_to_delete]
        image_classes=new_image_classes
        print("done in %0.3fs" % (time() - t0))
        print("Found descriptors for ",len(image_paths)," training images")

        print("Stacking...")
        t0 = time()
        descriptors = des_list[0][1]
        for image_path, descriptor in des_list[1:]:
            descriptors = np.vstack((descriptors, descriptor))
        print("done in %0.3fs" % (time() - t0))

        descriptors_float = descriptors.astype(float) 

        print("Creating clusters and histogram...")
        t0 = time()
        k = 200
        voc, variance = kmeans(descriptors_float, k, 1)

        im_features = np.zeros((len(image_paths), k), "float32")
        for i in range(len(image_paths)):
            words, distance = vq(des_list[i][1],voc)
            for w in words:
                im_features[i][w] += 1

        nbr_occurences = np.sum( (im_features > 0) * 1, axis = 0)
        idf = np.array(np.log((1.0*len(image_paths)+1) / (1.0*nbr_occurences + 1)), 'float32')

        stdSlr = StandardScaler().fit(im_features)
        im_features = stdSlr.transform(im_features)
        print("done in %0.3fs" % (time() - t0))
        
        print("Training the model...")
        t0 = time()
        model.fit(im_features, np.array(image_classes))        
        print("Training done in %0.3fs" % (time() - t0))
        
        #Testing
        
        print("Finding descriptors for ",len(test_image_paths)," testing images")
        t0 = time()

        des_list = []

        list_to_delete=[]

        for i,image_path in enumerate(test_image_paths):
            im = cv2.imread(image_path)

            kpts, des = orb.detectAndCompute(im, None)
            #kpts, des = brisk.detectAndCompute(im, None)
            #kpts, des = surf.detectAndCompute(im, None)
            #kpts, des = sift.detectAndCompute(im, None)

            if des is not None:
                des_list.append((image_path, des))
            else:
                list_to_delete.append(i)

        new_image_paths = [j for i, j in enumerate(test_image_paths) if i not in list_to_delete]
        test_image_paths=new_image_paths

        new_image_classes = [j for i, j in enumerate(test_image_classes) if i not in list_to_delete]
        test_image_classes=new_image_classes
        print("done in %0.3fs" % (time() - t0))
        print("Found descriptors for ",len(test_image_paths)," testing images")

        print("Stacking...")
        t0 = time()
        descriptors = des_list[0][1]
        for image_path, descriptor in des_list[1:]:
            descriptors = np.vstack((descriptors, descriptor))
        print("done in %0.3fs" % (time() - t0))

        print("Calculating histogram, Scaling...")
        test_features = np.zeros((len(test_image_paths), k), "float32")
        for i in range(len(test_image_paths)):
            words, distance = vq(des_list[i][1],voc)
            for w in words:
                test_features[i][w] += 1

        nbr_occurences = np.sum( (test_features > 0) * 1, axis = 0)
        idf = np.array(np.log((1.0*len(test_image_paths)+1) / (1.0*nbr_occurences + 1)), 'float32')

        test_features = stdSlr.transform(test_features)
        print("done in %0.3fs" % (time() - t0))

        true_class =  [training_names[i-1] for i in test_image_classes]
        predictions =  [training_names[i-1] for i in model.predict(test_features)]
        
        #preds=model.predict(x_test)
        fold_acc=accuracy_score(true_class,predictions)
        accuracies.append(fold_acc)
        
    return - 1.0 * np.mean(accuracies)


In [None]:
optimization_function_ada=partial(optimize_ada,x=face_data,y=label_data)

In [None]:
study_ada=optuna.create_study(direction="minimize")

In [None]:
study_ada.optimize(optimization_function_ada,n_trials=150,n_jobs=-1)

In [None]:
print(study_ada.best_trial)

GaussianNB (no need for tuning)

In [None]:
model = GaussianNB()
model.fit(X_train,y_train)

Gaussian Process Classifier

In [None]:
def optimize_gauss(trial,x,y):
    #kernel_l=["DotProduct","Matern","RBF","RationalQuadratic","WhiteKernel"]
    #kernel = trial.suggest_categorical("kernel", kernel_l)
    
#    if kernel=="DotProduct":
#        sigma_0=trial.suggest_uniform("sigma_0",0.2,3.0)
 #       gpc = GaussianProcessClassifier(kernel=1.0*DotProduct(sigma_0=sigma_0),
 #                                   random_state=0)
#    elif kernel=="Matern":
 #       length_scale=trial.suggest_uniform("length_scale",0.2,3.0)
#        gpc = GaussianProcessClassifier(kernel=1.0*Matern(length_scale=length_scale),
#                                    random_state=0)
#    elif kernel=="RBF":
    length_scale=trial.suggest_uniform("length_scale",0.2,3.0)
    model = GaussianProcessClassifier(kernel=1.0*RBF(length_scale=length_scale),
                                random_state=0)
#    elif kernel=="RationalQuadratic":
#        length_scale=trial.suggest_uniform("length_scale",0.2,3.0)
#        gpc = GaussianProcessClassifier(kernel=1.0*RationalQuadratic(length_scale=length_scale),
#                                    random_state=0)
#    else:
#        noise_level=trial.suggest_uniform("noise_level",0.5,1.5)
#        gpc = GaussianProcessClassifier(kernel=1.0*WhiteKernel(noise_level=noise_level),
#                                    random_state=0)
    
    #warm_start = trial.suggest_categorical("warm_start", [True,False])
    #n_restarts_optimizer = trial.suggest_categorical("n_restarts_optimizer", [0,1,2,3,4,5,6,7,8,9,10])
    #copy_X_train = trial.suggest_categorical("copy_X_train", [True,False])
    
    #gpc = GaussianProcessClassifier(warm_start=warm_start,
     #                               n_restarts_optimizer=n_restarts_optimizer,
      #                              copy_X_train=copy_X_train,
       #                             random_state=0)
    kfol=5
    accuracies=[]
    for k_f in range (1,kfol+1):
        print("Fold Number : ",k_f)
        #Training
        train_faces=[]
        test_faces=[]
        train_labels=[]
        test_labels=[]

        test_faces=face_data[k_f-1]
        test_labels=label_data[k_f-1]

        for z in range(0,kfol):
            if (z!=(k_f-1)):
                #print("Fold Chosen: ",z+1)
                train_faces.extend(face_data[z])
                train_labels.extend(label_data[z])

        image_paths=train_faces
        image_classes=train_labels
        test_image_paths=test_faces
        test_image_classes=test_labels
        des_list = []

        orb=cv2.ORB_create()
        #brisk = cv2.BRISK_create(30)
        #surf=cv2.xfeatures2d.SURF_create()
        #sift = cv2.xfeatures2d.SIFT_create(nOctaveLayers=3, contrastThreshold=0.03, edgeThreshold=10, sigma=1.6)

        list_to_delete=[] 

        print("Finding descriptors for ",len(image_paths)," training images")
        t0 = time()
        for i,image_path in enumerate(image_paths):
            im = cv2.imread(image_path)

            kpts, des = orb.detectAndCompute(im, None)
            #kpts, des = brisk.detectAndCompute(im, None)
            #kpts, des = surf.detectAndCompute(im, None)
            #kpts, des = sift.detectAndCompute(im, None)

            if des is not None:
                des_list.append((image_path, des))
            else:
                list_to_delete.append(i)

        new_image_paths = [j for i, j in enumerate(image_paths) if i not in list_to_delete]
        image_paths=new_image_paths

        new_image_classes = [j for i, j in enumerate(image_classes) if i not in list_to_delete]
        image_classes=new_image_classes
        print("done in %0.3fs" % (time() - t0))
        print("Found descriptors for ",len(image_paths)," training images")

        print("Stacking...")
        t0 = time()
        descriptors = des_list[0][1]
        for image_path, descriptor in des_list[1:]:
            descriptors = np.vstack((descriptors, descriptor))
        print("done in %0.3fs" % (time() - t0))

        descriptors_float = descriptors.astype(float) 

        print("Creating clusters and histogram...")
        t0 = time()
        k = 200
        voc, variance = kmeans(descriptors_float, k, 1)

        im_features = np.zeros((len(image_paths), k), "float32")
        for i in range(len(image_paths)):
            words, distance = vq(des_list[i][1],voc)
            for w in words:
                im_features[i][w] += 1

        nbr_occurences = np.sum( (im_features > 0) * 1, axis = 0)
        idf = np.array(np.log((1.0*len(image_paths)+1) / (1.0*nbr_occurences + 1)), 'float32')

        stdSlr = StandardScaler().fit(im_features)
        im_features = stdSlr.transform(im_features)
        print("done in %0.3fs" % (time() - t0))
        
        print("Training the model...")
        t0 = time()
        model.fit(im_features, np.array(image_classes))        
        print("Training done in %0.3fs" % (time() - t0))
        
        #Testing
        
        print("Finding descriptors for ",len(test_image_paths)," testing images")
        t0 = time()

        des_list = []

        list_to_delete=[]

        for i,image_path in enumerate(test_image_paths):
            im = cv2.imread(image_path)

            kpts, des = orb.detectAndCompute(im, None)
            #kpts, des = brisk.detectAndCompute(im, None)
            #kpts, des = surf.detectAndCompute(im, None)
            #kpts, des = sift.detectAndCompute(im, None)

            if des is not None:
                des_list.append((image_path, des))
            else:
                list_to_delete.append(i)

        new_image_paths = [j for i, j in enumerate(test_image_paths) if i not in list_to_delete]
        test_image_paths=new_image_paths

        new_image_classes = [j for i, j in enumerate(test_image_classes) if i not in list_to_delete]
        test_image_classes=new_image_classes
        print("done in %0.3fs" % (time() - t0))
        print("Found descriptors for ",len(test_image_paths)," testing images")

        print("Stacking...")
        t0 = time()
        descriptors = des_list[0][1]
        for image_path, descriptor in des_list[1:]:
            descriptors = np.vstack((descriptors, descriptor))
        print("done in %0.3fs" % (time() - t0))

        print("Calculating histogram, Scaling...")
        test_features = np.zeros((len(test_image_paths), k), "float32")
        for i in range(len(test_image_paths)):
            words, distance = vq(des_list[i][1],voc)
            for w in words:
                test_features[i][w] += 1

        nbr_occurences = np.sum( (test_features > 0) * 1, axis = 0)
        idf = np.array(np.log((1.0*len(test_image_paths)+1) / (1.0*nbr_occurences + 1)), 'float32')

        test_features = stdSlr.transform(test_features)
        print("done in %0.3fs" % (time() - t0))

        true_class =  [training_names[i-1] for i in test_image_classes]
        predictions =  [training_names[i-1] for i in model.predict(test_features)]
        
        #preds=model.predict(x_test)
        fold_acc=accuracy_score(true_class,predictions)
        accuracies.append(fold_acc)
        
    return - 1.0 * np.mean(accuracies)

In [None]:
optimization_function_gauss=partial(optimize_gauss,x=face_data,y=label_data)

In [None]:
study_gauss=optuna.create_study(direction="minimize")

In [None]:
study_gauss.optimize(optimization_function_gauss,n_trials=150,n_jobs=-1)

In [None]:
print(study_gauss.best_trial)

Ridge Classifier

In [None]:
def optimize_ridge(trial,x,y):
    alpha_list = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
    alpha=trial.suggest_categorical("alpha",alpha_list)
    solver=trial.suggest_categorical("solver",['auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'])  
    class_weight=trial.suggest_categorical("class_weight",[None, 'balanced'])

    model = RidgeClassifier(alpha=alpha,solver=solver,class_weight=class_weight)
    
    kfol=5
    accuracies=[]
    for k_f in range (1,kfol+1):
        print("Fold Number : ",k_f)
        #Training
        train_faces=[]
        test_faces=[]
        train_labels=[]
        test_labels=[]

        test_faces=face_data[k_f-1]
        test_labels=label_data[k_f-1]

        for z in range(0,kfol):
            if (z!=(k_f-1)):
                #print("Fold Chosen: ",z+1)
                train_faces.extend(face_data[z])
                train_labels.extend(label_data[z])

        image_paths=train_faces
        image_classes=train_labels
        test_image_paths=test_faces
        test_image_classes=test_labels
        des_list = []

        orb=cv2.ORB_create()
        #brisk = cv2.BRISK_create(30)
        #surf=cv2.xfeatures2d.SURF_create()
        #sift = cv2.xfeatures2d.SIFT_create(nOctaveLayers=3, contrastThreshold=0.03, edgeThreshold=10, sigma=1.6)

        list_to_delete=[] 

        print("Finding descriptors for ",len(image_paths)," training images")
        t0 = time()
        for i,image_path in enumerate(image_paths):
            im = cv2.imread(image_path)

            kpts, des = orb.detectAndCompute(im, None)
            #kpts, des = brisk.detectAndCompute(im, None)
            #kpts, des = surf.detectAndCompute(im, None)
            #kpts, des = sift.detectAndCompute(im, None)

            if des is not None:
                des_list.append((image_path, des))
            else:
                list_to_delete.append(i)

        new_image_paths = [j for i, j in enumerate(image_paths) if i not in list_to_delete]
        image_paths=new_image_paths

        new_image_classes = [j for i, j in enumerate(image_classes) if i not in list_to_delete]
        image_classes=new_image_classes
        print("done in %0.3fs" % (time() - t0))
        print("Found descriptors for ",len(image_paths)," training images")

        print("Stacking...")
        t0 = time()
        descriptors = des_list[0][1]
        for image_path, descriptor in des_list[1:]:
            descriptors = np.vstack((descriptors, descriptor))
        print("done in %0.3fs" % (time() - t0))

        descriptors_float = descriptors.astype(float) 

        print("Creating clusters and histogram...")
        t0 = time()
        k = 200
        voc, variance = kmeans(descriptors_float, k, 1)

        im_features = np.zeros((len(image_paths), k), "float32")
        for i in range(len(image_paths)):
            words, distance = vq(des_list[i][1],voc)
            for w in words:
                im_features[i][w] += 1

        nbr_occurences = np.sum( (im_features > 0) * 1, axis = 0)
        idf = np.array(np.log((1.0*len(image_paths)+1) / (1.0*nbr_occurences + 1)), 'float32')

        stdSlr = StandardScaler().fit(im_features)
        im_features = stdSlr.transform(im_features)
        print("done in %0.3fs" % (time() - t0))
        
        print("Training the model...")
        t0 = time()
        model.fit(im_features, np.array(image_classes))        
        print("Training done in %0.3fs" % (time() - t0))
        
        #Testing
        
        print("Finding descriptors for ",len(test_image_paths)," testing images")
        t0 = time()

        des_list = []

        list_to_delete=[]

        for i,image_path in enumerate(test_image_paths):
            im = cv2.imread(image_path)

            kpts, des = orb.detectAndCompute(im, None)
            #kpts, des = brisk.detectAndCompute(im, None)
            #kpts, des = surf.detectAndCompute(im, None)
            #kpts, des = sift.detectAndCompute(im, None)

            if des is not None:
                des_list.append((image_path, des))
            else:
                list_to_delete.append(i)

        new_image_paths = [j for i, j in enumerate(test_image_paths) if i not in list_to_delete]
        test_image_paths=new_image_paths

        new_image_classes = [j for i, j in enumerate(test_image_classes) if i not in list_to_delete]
        test_image_classes=new_image_classes
        print("done in %0.3fs" % (time() - t0))
        print("Found descriptors for ",len(test_image_paths)," testing images")

        print("Stacking...")
        t0 = time()
        descriptors = des_list[0][1]
        for image_path, descriptor in des_list[1:]:
            descriptors = np.vstack((descriptors, descriptor))
        print("done in %0.3fs" % (time() - t0))

        print("Calculating histogram, Scaling...")
        test_features = np.zeros((len(test_image_paths), k), "float32")
        for i in range(len(test_image_paths)):
            words, distance = vq(des_list[i][1],voc)
            for w in words:
                test_features[i][w] += 1

        nbr_occurences = np.sum( (test_features > 0) * 1, axis = 0)
        idf = np.array(np.log((1.0*len(test_image_paths)+1) / (1.0*nbr_occurences + 1)), 'float32')

        test_features = stdSlr.transform(test_features)
        print("done in %0.3fs" % (time() - t0))

        true_class =  [training_names[i-1] for i in test_image_classes]
        predictions =  [training_names[i-1] for i in model.predict(test_features)]
        
        #preds=model.predict(x_test)
        fold_acc=accuracy_score(true_class,predictions)
        accuracies.append(fold_acc)
        
    return - 1.0 * np.mean(accuracies)

In [None]:
optimization_function_ridge=partial(optimize_ridge,x=face_data,y=label_data)

In [None]:
study_ridge=optuna.create_study(direction="minimize")

In [None]:
study_ridge.optimize(optimization_function_ridge,n_trials=150,n_jobs=2)

In [None]:
print(study_ridge.best_trial)

In [None]:
for i in range(0,100):
    print(i,": ",study_ridge.trials[i].value,", params:", study_ridge.trials[i].params,", duration:",study_ridge.trials[i].duration)