In [1]:
!git clone https://github.com/sinhajiya/Face_Recognition_from_Features_using_Yale_Face_Database.git

Cloning into 'Face_Recognition_from_Features_using_Yale_Face_Database'...
remote: Enumerating objects: 1284, done.[K
remote: Counting objects: 100% (368/368), done.[K
remote: Compressing objects: 100% (265/265), done.[K
remote: Total 1284 (delta 120), reused 335 (delta 102), pack-reused 916 (from 1)[K
Receiving objects: 100% (1284/1284), 60.91 MiB | 8.87 MiB/s, done.
Resolving deltas: 100% (584/584), done.
Updating files: 100% (1168/1168), done.


In [None]:
import numpy as np
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

In [None]:
# Paths to each subfolder
base_folder = '/content/Face_Recognition_from_Features_using_Yale_Face_Database'
log_path = os.path.join(base_folder, 'LoG_transformed')
gabor_path = os.path.join(base_folder, 'gabor_features')
glcm_path = os.path.join(base_folder, 'glcm_features')
lbp_path = os.path.join(base_folder, 'lbp_histogram')

# Initialize lists to store combined features and labels
combined_features = []
labels = []

# Define placeholder sizes for missing features if necessary
log_placeholder = np.zeros((50,))       # Replace 50 with actual feature size if known
gabor_placeholder = np.zeros((80,))     # Replace 80 with actual feature size if known
glcm_placeholder = np.zeros((30,))      # Replace 30 with actual feature size if known
lbp_placeholder = np.zeros((60,))       # Replace 60 with actual feature size if known

# Load and combine features based on filenames
for filename in os.listdir(log_path):
    # Extract base name and label
    base_name = filename.replace('.npy', '')
    label = base_name.split('.')[0]  # Assuming "subject01" is the label

    # Load and flatten LoG features
    log_features = np.load(os.path.join(log_path, filename)).flatten() if os.path.exists(os.path.join(log_path, filename)) else log_placeholder

    # Load and flatten Gabor features with updated filename pattern
    gabor_filename = f"{base_name}.npy_gabor.npy"
    gabor_features = np.load(os.path.join(gabor_path, gabor_filename)).flatten() if os.path.exists(os.path.join(gabor_path, gabor_filename)) else gabor_placeholder

    # Load and flatten GLCM features from CSV
    glcm_filename = f"{base_name}_features.csv"
    if os.path.exists(os.path.join(glcm_path, glcm_filename)):
        glcm_df = pd.read_csv(os.path.join(glcm_path, glcm_filename))
        glcm_features = glcm_df.values.flatten()
    else:
        glcm_features = glcm_placeholder

    # Load and flatten LBP features
    lbp_filename = f"{base_name}.npy_lbp.npy"
    lbp_features = np.load(os.path.join(lbp_path, lbp_filename)).flatten() if os.path.exists(os.path.join(lbp_path, lbp_filename)) else lbp_placeholder

    # Concatenate all features for this image
    combined_feature_vector = np.concatenate((log_features, gabor_features, glcm_features, lbp_features))
    combined_features.append(combined_feature_vector)
    labels.append(label)

# Convert lists to arrays
combined_features = np.array(combined_features)
labels = np.array(labels)

In [None]:
combined_features.shape

(165, 155544)

In [None]:
!ls /content/Face_Recognition_from_Features_using_Yale_Face_Database/data | wc -l

165


In [None]:
print(f"The dimension of feature vector : {combined_features.shape}")
print(f"The number of features from LBP:{lbp_features.shape}")
print(f"The number of features from GLCM:{glcm_features.shape}")
print(f"The number of features from Gabor:{gabor_features.shape}")
print(f"The number of features from LoG:{log_features.shape}")
print(f"The total number of features: {lbp_features.shape[0]+glcm_features.shape[0]+gabor_features.shape[0]+log_features.shape[0]}")

The dimension of feature vector : (165, 155544)
The number of features from LBP:(77760,)
The number of features from GLCM:(6,)
The number of features from Gabor:(18,)
The number of features from LoG:(77760,)
The total number of features: 155544


In [None]:
# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(combined_features, labels, test_size=0.3, random_state=42)

### STANDARIZATION

In [None]:
# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

### APPLYING PCA

In [None]:
# Apply PCA to reduce dimensions
pca = PCA(n_components=0.95)  # Retain 95% variance
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

In [None]:
y_test_df = pd.DataFrame(y_test)

# Get unique values from the entire DataFrame
pd.unique(y_test_df.values.ravel())

array(['subject13', 'subject08', 'subject14', 'subject06', 'subject04',
       'subject12', 'subject01', 'subject15', 'subject03', 'subject09',
       'subject10', 'subject07', 'subject05', 'subject02', 'subject11'],
      dtype=object)

In [None]:
print(f"The dimension of feature vector before transformation: {X_train.shape}")
print(f"The dimension of feature vector after transformation: {X_train_pca.shape}")
print(f"The number of features dropped by PCA: {(X_train.shape)[1]-(X_train_pca.shape)[1]}")

The dimension of feature vector before transformation: (115, 155544)
The dimension of feature vector after transformation: (115, 93)
The number of features dropped by PCA: 155451


## APPLYING SVC

In [None]:
# Define parameter grid for SVC
param_grid_svc = {
    'C': [0.1, 1, 10, 100],
    'gamma': ['scale', 'auto', 0.1, 1],
    'kernel': ['linear', 'rbf', 'poly'],
}

#### SVC USING THE DIMENSIONAL REDUCTION

In [None]:
# Initialize SVC
svc = SVC()

# Perform grid search with cross-validation
grid_search = GridSearchCV(svc, param_grid_svc, cv=5, n_jobs=-1, verbose=1)
grid_search.fit(X_train_pca, y_train)


# Print best parameters found
print(f"Best parameters: {grid_search.best_params_}")

# Use the best estimator found by grid search
best_svc = grid_search.best_estimator_

# Predict on the test set
y_pred = best_svc.predict(X_test_pca)

# Evaluate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Optimized classification accuracy after PCA for SVC: {accuracy * 100:.2f}%")

Fitting 5 folds for each of 48 candidates, totalling 240 fits
Best parameters: {'C': 0.1, 'gamma': 'scale', 'kernel': 'linear'}
Optimized classification accuracy after PCA for SVC: 70.00%


#### SVC ON ORIGINAL FEATURE VECTOR

In [None]:
# Train classifier
classifier = SVC()
classifier.fit(X_train, y_train)

# Test and evaluate
y_pred = classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"Classification accuracy: {accuracy * 100:.2f}%")

In [None]:
# Initialize SVC
svc = SVC()

# Perform grid search with cross-validation
grid_search = GridSearchCV(svc, param_grid_svc, cv=5, n_jobs=-1, verbose=1)
grid_search.fit(X_train, y_train)

# Print best parameters found
print(f"Best parameters: {grid_search.best_params_}")

# Use the best estimator found by grid search
best_svc = grid_search.best_estimator_

# Predict on the test set
y_pred = best_svc.predict(X_test)

# Evaluate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Optimized classification accuracy: {accuracy * 100:.2f}%")

Fitting 5 folds for each of 48 candidates, totalling 240 fits


KeyboardInterrupt: 

## RANDOM FOREST

#### RANDOM FOREST WITHOUT DIMENSION REDUCTION

In [None]:
# Apply Random Forest Classifier
classifier = RandomForestClassifier(n_estimators=100, random_state=42)  # You can adjust the number of estimators
classifier.fit(X_train, y_train)

# Test and evaluate
y_pred = classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"Classification accuracy with Random Forest: {accuracy * 100:.2f}%")

Classification accuracy with Random Forest: 84.00%


In [None]:
param_grid_RF = {
    'criterion': ['gini', 'entropy', 'log_loss'],
    'n_estimators': [100, 150, 200],          # Number of trees in the forest
    'max_depth': [None, 10, 20, 30],              # Maximum depth of each tree
    'min_samples_split': [2, 5, 10],              # Minimum samples required to split an internal node
    'min_samples_leaf': [1, 2, 4],                # Minimum samples required at each leaf node
    'bootstrap': [True, False]                 # Whether to use bootstrap samples
}

In [None]:
# Initialize the Random Forest classifier
rf_classifier = RandomForestClassifier(random_state=42)

# Initialize GridSearchCV with cross-validation
grid_search = GridSearchCV(estimator=rf_classifier, param_grid=param_grid_RF,
                           cv=5, n_jobs=-1, verbose=2, scoring='accuracy')

# Fit the grid search to the data
grid_search.fit(X_train, y_train)

# Get the best estimator and evaluate it
best_rf_classifier = grid_search.best_estimator_

# Predict on the test set
y_pred = best_rf_classifier.predict(X_test)

# Evaluate the classifier
accuracy = accuracy_score(y_test, y_pred)

print("Best Parameters:", grid_search.best_params_)
print(f"Classification accuracy with the best Random Forest: {accuracy * 100:.2f}%")

Fitting 5 folds for each of 288 candidates, totalling 1440 fits
Best Parameters: {'bootstrap': False, 'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 200}
Classification accuracy with the best Random Forest: 80.00%


### RANDOM FOREST ON REDUCED DATA

In [None]:
# Initialize the Random Forest classifier
rf_classifier = RandomForestClassifier(random_state=42)

# Initialize GridSearchCV with cross-validation
grid_search = GridSearchCV(estimator=rf_classifier, param_grid=param_grid_RF,
                           cv=5, n_jobs=-1, verbose=2, scoring='accuracy')

# Fit the grid search to the data
grid_search.fit(X_train_pca, y_train)

# Get the best estimator and evaluate it
best_rf_classifier = grid_search.best_estimator_

# Predict on the test set
y_pred = best_rf_classifier.predict(X_test_pca)

# Evaluate the classifier
accuracy = accuracy_score(y_test, y_pred)

print("Best Parameters:", grid_search.best_params_)
print(f"Classification accuracy with the best Random Forest: {accuracy * 100:.2f}%")

Fitting 5 folds for each of 72 candidates, totalling 360 fits


KeyboardInterrupt: 

In [None]:
# Apply Random Forest Classifier on reduced data
classifier = RandomForestClassifier(n_estimators=100, random_state=42)  # You can adjust the number of estimators
classifier.fit(X_train_pca, y_train)

# Test and evaluate
y_pred = classifier.predict(X_test_pca)
accuracy = accuracy_score(y_test, y_pred)
print(f"Classification accuracy with Random Forest: {accuracy * 100:.2f}%")


Classification accuracy with Random Forest: 70.00%


## *LDA*

In [None]:
lda = LinearDiscriminantAnalysis()

# Fit the model on the training data
lda.fit(X_train, y_train)

# Make predictions on the test data
y_pred = lda.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Classification accuracy with LDA: {accuracy * 100:.2f}%")


Classification accuracy with LDA: 70.00%
