In [None]:
import os
import cv2
import numpy as np
from skimage.io import imread
from skimage.transform import resize
from skimage.feature import hog
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier

In [None]:
import os
from zipfile import ZipFile


!pip install kaggle


os.environ['KAGGLE_USERNAME'] = "majisouvik1099"
os.environ['KAGGLE_KEY'] = "9464225218d56f8bfea9dd9cc437489f"


!kaggle datasets download -d jessicali9530/lfw-dataset


with ZipFile('lfw-dataset.zip', 'r') as zip_ref:
    zip_ref.extractall('lfw-dataset')


os.remove('lfw-dataset.zip')


In [9]:

def extract_features(image_path, model):
    image = Image.open(image_path).convert('RGB')
    preprocess = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    image = preprocess(image)
    image = image.unsqueeze(0)
    with torch.no_grad():
        features = model(image)
    features = features.squeeze(0)
    return features


def compute_hog(img):
    resized_img = resize(img, (128*4, 64*4))
    fd, hog_image = hog(resized_img, orientations=9, pixels_per_cell=(8, 8),
                    cells_per_block=(2, 2), visualize=True,channel_axis=-1)
    return fd


def get_pixel(img, center, x, y):
    new_value = 0
    try:
        if img[x][y] >= center:
            new_value = 1
    except:
        pass
    return new_value

def lbp_calculated_pixel(img, x, y):
    center = img[x][y]
    val_ar = []
    val_ar.append(get_pixel(img, center, x-1, y+1))
    val_ar.append(get_pixel(img, center, x, y+1))
    val_ar.append(get_pixel(img, center, x+1, y+1))
    val_ar.append(get_pixel(img, center, x+1, y))
    val_ar.append(get_pixel(img, center, x+1, y-1))
    val_ar.append(get_pixel(img, center, x, y-1))
    val_ar.append(get_pixel(img, center, x-1, y-1))
    val_ar.append(get_pixel(img, center, x-1, y))

    power_val = [1, 2, 4, 8, 16, 32, 64, 128]
    val = 0
    for i in range(len(val_ar)):
        val += val_ar[i] * power_val[i]
    return val

def calcLBP(img):
    height, width, channel = img.shape
    img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    img_lbp = np.zeros((height, width,3), np.uint8)
    for i in range(0, height):
        for j in range(0, width):
             img_lbp[i, j] = lbp_calculated_pixel(img_gray, i, j)
    hist_lbp = cv2.calcHist([img_lbp], [0], None, [256], [0, 256])
    return hist_lbp.flatten()


In [7]:
resnet = models.resnet50(pretrained=True)
resnet = nn.Sequential(*list(resnet.children())[:-1])
resnet.eval()

Sequential(
  (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
  (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (4): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)


In [10]:
lfw_folder = '/kaggle/input/lfw-dataset/lfw-deepfunneled/lfw-deepfunneled'
X, y = [], []
for folder_name in os.listdir(lfw_folder):
    folder_path = os.path.join(lfw_folder, folder_name)
    if os.path.isdir(folder_path):
        num_images = len(os.listdir(folder_path))
        if num_images > 70:
            for image_name in os.listdir(folder_path):
                image_path = os.path.join(folder_path, image_name)
                image = imread(image_path)
                hog_feature = compute_hog(image)
                # lbp_feature = calcLBP(image)
                # cnn_feature = extract_features(image_path, resnet).numpy()
                hog_feature = hog_feature.reshape(-1)
                # lbp_feature = lbp_feature.reshape(-1)
                # cnn_feature = cnn_feature.flatten()
                # combined_feature = np.concatenate((hog_feature, lbp_feature, cnn_feature))

                X.append(hog_feature)
                y.append(folder_name)


In [11]:
hog_feature

array([0., 0., 0., ..., 0., 0., 0.])

In [12]:
np.array(X).shape

(1288, 70308)

In [27]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=30)

In [29]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
import numpy as np

# Grid
param_grid = {'C': np.logspace(-3, 3, 7)}

# L1 regularization
lasso_logreg = LogisticRegression(penalty='l1', solver='liblinear', max_iter=10000)

# find best hyperparameters
grid_search = GridSearchCV(lasso_logreg, param_grid, cv=5)

grid_search.fit(X_train, y_train)

In [36]:
# best model
best_lasso_logreg = grid_search.best_estimator_

# Fit the model
best_lasso_logreg.fit(X_train, y_train)

# prediction
y_pred = best_lasso_logreg.predict(X_test)

# Evaluation
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
# classification report
report = classification_report(y_test, y_pred)

print("Classification Report:\n", report)

Accuracy: 0.9496124031007752
Classification Report:
                    precision    recall  f1-score   support

     Ariel_Sharon       1.00      0.79      0.88        14
     Colin_Powell       0.91      0.98      0.94        43
  Donald_Rumsfeld       0.92      0.96      0.94        24
    George_W_Bush       0.95      0.98      0.97       124
Gerhard_Schroeder       1.00      0.89      0.94        18
      Hugo_Chavez       1.00      0.85      0.92        13
       Tony_Blair       0.95      0.91      0.93        22

         accuracy                           0.95       258
        macro avg       0.96      0.91      0.93       258
     weighted avg       0.95      0.95      0.95       258



In [37]:
# prediction
y_pred = best_lasso_logreg.predict(X_train)

# Evaluation
accuracy = accuracy_score(y_train, y_pred)
print("Accuracy:", accuracy)
# classification report
report = classification_report(y_train, y_pred)

print("Classification Report:\n", report)

Accuracy: 1.0
Classification Report:
                    precision    recall  f1-score   support

     Ariel_Sharon       1.00      1.00      1.00        63
     Colin_Powell       1.00      1.00      1.00       193
  Donald_Rumsfeld       1.00      1.00      1.00        97
    George_W_Bush       1.00      1.00      1.00       406
Gerhard_Schroeder       1.00      1.00      1.00        91
      Hugo_Chavez       1.00      1.00      1.00        58
       Tony_Blair       1.00      1.00      1.00       122

         accuracy                           1.00      1030
        macro avg       1.00      1.00      1.00      1030
     weighted avg       1.00      1.00      1.00      1030



In [31]:
from sklearn.decomposition import PCA
pca = PCA(n_components=0.95)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)


In [32]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
lda = LDA(n_components=None)
X_train_lda = lda.fit_transform(X_train, y_train_encoded)
X_test_lda = lda.transform(X_test)

In [33]:
# best model
best_lasso_logreg = grid_search.best_estimator_

# Fit the model
best_lasso_logreg.fit(X_train_pca, y_train)

# Mprediction
y_pred = best_lasso_logreg.predict(X_test_pca)

# Evaluation
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
# classification report
report = classification_report(y_test, y_pred)

print("Classification Report:\n", report)

Accuracy: 0.9224806201550387
Classification Report:
                    precision    recall  f1-score   support

     Ariel_Sharon       1.00      0.86      0.92        14
     Colin_Powell       0.87      0.95      0.91        43
  Donald_Rumsfeld       1.00      0.88      0.93        24
    George_W_Bush       0.92      0.99      0.95       124
Gerhard_Schroeder       1.00      0.89      0.94        18
      Hugo_Chavez       1.00      0.46      0.63        13
       Tony_Blair       0.86      0.86      0.86        22

         accuracy                           0.92       258
        macro avg       0.95      0.84      0.88       258
     weighted avg       0.93      0.92      0.92       258



In [34]:
# best model
best_lasso_logreg = grid_search.best_estimator_

# Fit the model
best_lasso_logreg.fit(X_train_lda, y_train)

# Mprediction
y_pred = best_lasso_logreg.predict(X_test_lda)

# Evaluation
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
# classification report
report = classification_report(y_test, y_pred)

print("Classification Report:\n", report)

Accuracy: 0.8837209302325582
Classification Report:
                    precision    recall  f1-score   support

     Ariel_Sharon       1.00      0.57      0.73        14
     Colin_Powell       0.87      0.95      0.91        43
  Donald_Rumsfeld       0.95      0.83      0.89        24
    George_W_Bush       0.86      0.99      0.92       124
Gerhard_Schroeder       1.00      0.72      0.84        18
      Hugo_Chavez       1.00      0.23      0.38        13
       Tony_Blair       0.87      0.91      0.89        22

         accuracy                           0.88       258
        macro avg       0.94      0.74      0.79       258
     weighted avg       0.90      0.88      0.87       258

