### 1. Setting paths for datasets

In [1]:
import os 
import numpy as np

cwd =  os.getcwd()

train_path = os.path.join(cwd, 'dataset/combined_5celebs_family/train')
validation_path = os.path.join(cwd, 'dataset/combined_5celebs_family/val/')

### 2. Loading the training data

In [None]:
from face_recog.io import ImageDataset

data_train = ImageDataset(train_path)
X_train, y_train = data_train.load_data(convert_xy=True)

In [None]:
print('Number of images:', len(X_train))
print('Number of labels:', len(y_train))

for i, x in enumerate(X_train):
    print('Label:', y_train[i], 'Shape:', x.shape)
    break

### 3. Detecting and Cropping faces 

In [None]:
from face_recog.detector import FaceDetectorTransformer

detector = FaceDetectorTransformer(final_size=(160, 160))
X_train = detector.fit_transform(X_train)
y_train = detector.apply_label_mask(y_train)

print('Shape of detected face datasets:', X_train.shape)
print('Length of label list', len(y_train))

In [None]:
remove = [2,39, 60, 101]

count = 0
for folder in os.listdir(data_train.image_path):
    data_train.img_array_detected[folder] = []
    folder_path = os.path.join(data_train.image_path, folder)
    for file_name in os.listdir(folder_path):
        path = os.path.join(folder_path, file_name) 
        if count in remove:
            print(path)
        count += 1

#### Displaying cropped images

In [None]:
import matplotlib.pyplot as plt

fig = plt.figure(figsize=(20, (len(X_train)/5 + 1)*2), dpi=80)

columns = 5
for i, v in enumerate(X_train):
    
    ax = plt.subplot(int(len(X_train)/columns)+1, columns, i+1)
    ax.set_title(str(i) + '. '+y_train[i])
    ax.imshow(v)

fig.tight_layout()
fig.show()

#### Remove Images which have been wrongly detected


In [None]:
# indices_to_remove = [181, 176, 155, 110, 59, 46]

# X, y = [], []
# for i in range(len(X_train)):
#     if i not in indices_to_remove:
#         X.append(X_train[i])
#         y.append(y_train[i])
        
# X_train = np.array(X)
# y_train = y

# print('Shape of detected face datasets:', X_train.shape)
# print('Length of label list', len(y_train))

### 4. Creating embeddings of the face

In [None]:
from face_recog.recognizer import FacenetEmbeddingsTransformer

recognizer = FacenetEmbeddingsTransformer(model_path='models/keras/facenet_keras.h5')
X_train = recognizer.fit_transform(X_train)
print(X_train.shape)

### 5. Creating a classifier

##### Normalizing embeddings

In [None]:
from sklearn.preprocessing import Normalizer
norm = Normalizer(norm='l2')
X_train = norm.fit_transform(X_train)

##### Training a classifier

#### Classifier #1: Gradient Boosting Classifier

In [None]:
from sklearn.ensemble import GradientBoostingClassifier

clf = GradientBoostingClassifier()
clf.fit(X_train, y_train)

#### Classifier #2: XGBoost

In [None]:
from xgboost import XGBClassifier

clf2 = XGBClassifier()
clf2.fit(X_train, y_train)

### 6. Validation

#### Loading data

In [None]:
data_val = ImageDataset(validation_path)
X_val, y_val = data_val.load_data(convert_xy=True)

print('Number of images:', len(X_val))
print('Number of labels:', len(y_val))

for i, x in enumerate(X_val):
    print('Label:', y_val[i], 'Shape:', x.shape)
    break

#### Detecting faces and generating embeddings

In [None]:
len(y_val)

In [None]:
X_val_ = detector.transform(X_val)
y_val = detector.apply_label_mask(y_val)

print('Shape of detected face datasets:', X_val_.shape)
print('Length of label list', len(y_val))

In [None]:
len(detector.label_mask)

In [None]:
X_val = recognizer.transform(X_val_)
print(X_val.shape)

#### Predicting

In [None]:
X_val = norm.transform(X_val)
y_pred = clf.predict(X_val)

#####  Accuracy

In [None]:
from sklearn.metrics import accuracy_score
print('Accuracy:', accuracy_score(y_val, y_pred)*100, '%')

### 7. Plotting Results

In [None]:
import matplotlib.pyplot as plt

fig = plt.figure(figsize=(20, (len(X_val_)/5 + 1)*2), dpi=80)

columns = 5
for i, v in enumerate(X_val_):
    
    ax = plt.subplot(int(len(X_val_)/columns)+1, columns, i+1)
    title = str(y_pred[i]+'//'+ y_val[i])
    if y_pred[i] == y_val[i]:
        ax.set_title(title)
    else:
        ax.set_title(title , color='red')
    ax.imshow(v)

fig.tight_layout()
fig.show()

### 8. Testing Results with XGBoost Classifier

#### Predicting

In [None]:
y_pred = clf2.predict(X_val)

#####  Accuracy

In [None]:
from sklearn.metrics import accuracy_score
print('Accuracy:', accuracy_score(y_val, y_pred)*100, '%')

### 7. Plotting Results

In [None]:
import matplotlib.pyplot as plt

fig = plt.figure(figsize=(20, (len(X_val_)/5 + 1)*2), dpi=80)

columns = 5
for i, v in enumerate(X_val_):
    
    ax = plt.subplot(int(len(X_val_)/columns)+1, columns, i+1)
    title = str(y_pred[i]+'//'+ y_val[i])
    if y_pred[i] == y_val[i]:
        ax.set_title(title)
    else:
        ax.set_title(title , color='red')
    ax.imshow(v)

fig.tight_layout()
fig.show()