In [26]:
# Common
from utils import *
# Model
from tensorflow.keras.models import load_model
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV 
import pickle
import pandas as pd

from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

# Setting up

In [2]:
# Setting a random
np.random.seed(42)

# Define the image dimensions
IMG_W, IMG_H, IMG_C = (160, 160, 3)

In [3]:
model = load_model('D:/Face recognition/model/facenet-keras/facenet_keras.h5')



# Data Preparation

In [4]:
root_path = 'D:/Face recognition/VN-celeb/'

# Collect all the person names
dir_names = os.listdir(root_path)
person_names = [name for name in dir_names]
n_individuals = len(person_names)

print(f"Total number of individuals: {n_individuals}/n")
# print(f"Name of the individuals : /n/t{person_names}")

Total number of individuals: 1020/n


In [5]:
# Train test split
trainpaths, testpaths = get_train_test_split(root_path)
len(trainpaths), len(testpaths)

Train test split: 100%|██████████| 1020/1020 [00:00<00:00, 5292.48it/s]


(18902, 4203)

In [8]:
# Creating labels for train and test data
print('Creating test data labels:')
test_labels = generating_labels(testpaths, 'VN-celeb/(\d+)/')
print('Creating train data labels:')
train_labels = generating_labels(trainpaths, 'VN-celeb/(\d+)/')

Creating test data labels:


100%|██████████| 4203/4203 [00:00<00:00, 333673.90it/s]


Creating train data labels:


100%|██████████| 18902/18902 [00:00<00:00, 254949.03it/s]


In [9]:
# Create embeddings for train data
embeddings = np.empty(shape=(len(trainpaths), 128))
print(embeddings.shape)
# Loop over images
for i in tqdm(range(len(trainpaths))):
    with open(os.devnull, 'w') as f, redirect_stdout(f):
        # Load the image
        image = load_image(trainpaths[i])

        # Generate the embedding
        embedding = image_to_embedding(image, model)

    # Store the embedding
    embeddings[i] = embedding

(18902, 128)


100%|██████████| 18902/18902 [4:05:06<00:00,  1.29it/s]  


In [11]:
# Create embeddings for test data
embeddings_test = np.empty(shape=(len(testpaths), 128))

# Loop over images
for i in tqdm(range(len(testpaths))):
    with open(os.devnull, 'w') as f, redirect_stdout(f):
        # Load the image
        image = load_image(testpaths[i])

        # Generate the embedding
        embedding = image_to_embedding(image, model)

    # Store the embedding
    embeddings_test[i] = embedding

100%|██████████| 4203/4203 [1:37:06<00:00,  1.39s/it]


In [24]:
with open('D:/Face recognition/Task3/X_train.pkl', 'wb') as f:
  pickle.dump(embeddings, f)
with open('D:/Face recognition/Task3/X_test.pkl', 'wb') as f:
  pickle.dump(embeddings_test, f)
with open('D:/Face recognition/Task3/y_train.pkl', 'wb') as f:
  pickle.dump(train_labels, f)
with open('D:/Face recognition/Task3/y_test.pkl', 'wb') as f:
  pickle.dump(test_labels, f)

# Modeling

In [2]:
from sklearn.metrics import accuracy_score, classification_report

# Data loading

In [13]:
with open("../Task3/Data/X_train.pkl", "rb") as f:   # Unpickling
  X_train = pickle.load(f)
with open("../Task3/Data/X_test.pkl", "rb") as f:   # Unpickling
  X_test = pickle.load(f)
with open("../Task3/Data/y_train.pkl", "rb") as f:   # Unpickling
  y_train = pickle.load(f)
with open("../Task3/Data/y_test.pkl", "rb") as f:   # Unpickling
  y_test = pickle.load(f)


In [22]:
X = np.concatenate((X_train, X_test))
y = np.concatenate((y_train, y_test))
X.shape, y.shape

((23105, 128), (23105,))

# Machine learning models

In [27]:
models=[]
models.append(('LDA', LinearDiscriminantAnalysis()))
models.append(("LR",LogisticRegression()))
models.append(("NB",GaussianNB()))
models.append(("KNN",KNeighborsClassifier()))
models.append(("DT",DecisionTreeClassifier()))
models.append(("SVM",SVC()))
models.append(("RF",RandomForestClassifier()))


In [28]:
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
# pca=PCA(n_components=50, whiten=True)
# pca.fit(X)
# X_pca=pca.transform(X)
for name, model in models:
    kfold=KFold(n_splits=5, shuffle=True, random_state=0)
    
    cv_scores=cross_val_score(model, X, y, cv=kfold)
    print("{} mean cross validations score:{:.5f}".format(name, cv_scores.mean()))

LDA mean cross validations score:0.84077
LR mean cross validations score:0.66345
NB mean cross validations score:0.77252
KNN mean cross validations score:0.78329
DT mean cross validations score:0.26345
SVM mean cross validations score:0.81173
RF mean cross validations score:0.72352
