# 1.Facial_recognition_model


This script builds a facial recognition model using pre-extracted facial features from images.
It trains and compares three machine learning classifiers: Random Forest (with hyperparameter tuning),
Logistic Regression, and XGBoost. The script includes data cleaning, feature selection, and model evaluation
using accuracy, F1 score, and log loss. Results help identify which model performs best at recognizing individuals
based on their facial feature vectors.

Import Libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score, log_loss, make_scorer
import xgboost as xgb

In [2]:
print("=== Facial Recognition Model with Hyperparameter Tuning ===")

=== Facial Recognition Model with Hyperparameter Tuning ===


Step 1: Load and clean the data

In [3]:
try:
    df = pd.read_csv('../image_features.csv')

    # Handle infinities and missing values
    df.replace([np.inf, -np.inf], np.nan, inplace=True)
    df.dropna(inplace=True)
except FileNotFoundError:
    print(" Error: '.../image_features.csv' not found.")
    exit()
except Exception as e:
    print(f" Error loading 'image_features.csv': {e}")
    exit()

Feature and target selection

In [4]:
feature_cols = [col for col in df.columns if col.startswith('bin_')]
if not feature_cols:
    print(" Error: No 'bin_' feature columns found.")
    exit()

X = df[feature_cols]
y = df['member']

if y.nunique() < 2:
    print(" Error: At least two unique member labels are required.")
    exit()

if y.value_counts().min() < 2:
    print(" Warning: Some classes have very few samples. Results may be unreliable.")

Train-test split

In [5]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

print(f" Data Split Complete — Training: {len(X_train)}, Test: {len(X_test)}")

 Data Split Complete — Training: 33, Test: 15


Random Forest with Hyperparameter Tuning 

In [6]:
print("\n Training Random Forest with GridSearchCV...")

param_grid_rf = {
    'n_estimators': [50, 100, 200],
    'max_features': ['sqrt', 'log2'],
    'max_depth': [None, 10, 20],
    'min_samples_leaf': [1, 2]
}

scorer = make_scorer(f1_score, average='weighted', zero_division=0)

grid_rf = GridSearchCV(
    estimator=RandomForestClassifier(random_state=42),
    param_grid=param_grid_rf,
    scoring=scorer,
    cv=min(5, len(X_train) // y_train.nunique()),
    n_jobs=-1,
    verbose=1
)

grid_rf.fit(X_train, y_train)
best_rf = grid_rf.best_estimator_


 Training Random Forest with GridSearchCV...
Fitting 5 folds for each of 36 candidates, totalling 180 fits


 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan]


Evaluate

In [7]:
y_pred_rf = best_rf.predict(X_test)
y_proba_rf = best_rf.predict_proba(X_test)

print(" Random Forest Results:")
print("Best Parameters:", grid_rf.best_params_)
print("Test Accuracy:", round(accuracy_score(y_test, y_pred_rf), 4))
print("Test F1 Score:", round(f1_score(y_test, y_pred_rf, average='weighted'), 4))
print("Test Log Loss:", round(log_loss(y_test, y_proba_rf), 4))


 Random Forest Results:
Best Parameters: {'max_depth': None, 'max_features': 'sqrt', 'min_samples_leaf': 1, 'n_estimators': 50}
Test Accuracy: 0.8667
Test F1 Score: 0.8578
Test Log Loss: 0.2693


Logistic Regression

In [8]:
print("\n Training Logistic Regression...")

lr = LogisticRegression(max_iter=1000, solver='liblinear', random_state=42)
lr.fit(X_train, y_train)
y_pred_lr = lr.predict(X_test)
y_proba_lr = lr.predict_proba(X_test)

print(" Logistic Regression Results:")
print("Test Accuracy:", round(accuracy_score(y_test, y_pred_lr), 4))
print("Test F1 Score:", round(f1_score(y_test, y_pred_lr, average='weighted'), 4))
print("Test Log Loss:", round(log_loss(y_test, y_proba_lr), 4))



 Training Logistic Regression...
 Logistic Regression Results:
Test Accuracy: 0.8
Test F1 Score: 0.7673
Test Log Loss: 1.7774


XGBoost Classifier

In [9]:
print("\n Training XGBoost Classifier...")

# Encode labels for XGBoost
label_map = {label: idx for idx, label in enumerate(y.unique())}
y_train_enc = y_train.map(label_map)
y_test_enc = y_test.map(label_map)

xgb_model = xgb.XGBClassifier(
    n_estimators=100,
    use_label_encoder=False,
    eval_metric='mlogloss',
    random_state=42
)

xgb_model.fit(X_train, y_train_enc)
y_pred_xgb = xgb_model.predict(X_test)
y_proba_xgb = xgb_model.predict_proba(X_test)

print(" XGBoost Results:")
print("Test Accuracy:", round(accuracy_score(y_test_enc, y_pred_xgb), 4))
print("Test F1 Score:", round(f1_score(y_test_enc, y_pred_xgb, average='weighted'), 4))
print("Test Log Loss:", round(log_loss(y_test_enc, y_proba_xgb), 4))

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



 Training XGBoost Classifier...
 XGBoost Results:
Test Accuracy: 0.7333
Test F1 Score: 0.6978
Test Log Loss: 0.6344
