In [8]:
import os
import cv2
import numpy as np
from skimage.io import imread
from skimage.transform import resize
from skimage.feature import hog
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier

In [9]:
import os
from zipfile import ZipFile


!pip install kaggle


os.environ['KAGGLE_USERNAME'] = "majisouvik1099"
os.environ['KAGGLE_KEY'] = "9464225218d56f8bfea9dd9cc437489f"


!kaggle datasets download -d jessicali9530/lfw-dataset


with ZipFile('lfw-dataset.zip', 'r') as zip_ref:
    zip_ref.extractall('lfw-dataset')


os.remove('lfw-dataset.zip')


[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Dataset URL: https://www.kaggle.com/datasets/jessicali9530/lfw-dataset
License(s): other
lfw-dataset.zip: Skipping, found more recently modified local copy (use --force to force download)


In [10]:

def extract_features(image_path, model):
    image = Image.open(image_path).convert('RGB')
    preprocess = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    image = preprocess(image)
    image = image.unsqueeze(0)
    with torch.no_grad():
        features = model(image)
    features = features.squeeze(0)
    return features


def compute_hog(img):
    gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    resized_img = cv2.resize(gray_img, (128*4, 64*4))
    resized_img_flat = resized_img.flatten()
    resized_img_2d = resized_img_flat.reshape((64*4, 128*4))  # Reshape to 2D
    fd, hog_image = hog(resized_img_2d, orientations=9, pixels_per_cell=(8, 8),
                         cells_per_block=(2, 2), visualize=True)
    return fd



def get_pixel(img, center, x, y):
    new_value = 0
    try:
        if img[x][y] >= center:
            new_value = 1
    except:
        pass
    return new_value

def lbp_calculated_pixel(img, x, y):
    center = img[x][y]
    val_ar = []
    val_ar.append(get_pixel(img, center, x-1, y+1))
    val_ar.append(get_pixel(img, center, x, y+1))
    val_ar.append(get_pixel(img, center, x+1, y+1))
    val_ar.append(get_pixel(img, center, x+1, y))
    val_ar.append(get_pixel(img, center, x+1, y-1))
    val_ar.append(get_pixel(img, center, x, y-1))
    val_ar.append(get_pixel(img, center, x-1, y-1))
    val_ar.append(get_pixel(img, center, x-1, y))

    power_val = [1, 2, 4, 8, 16, 32, 64, 128]
    val = 0
    for i in range(len(val_ar)):
        val += val_ar[i] * power_val[i]
    return val

def calcLBP(img):
    height, width, channel = img.shape
    img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    img_lbp = np.zeros((height, width,3), np.uint8)
    for i in range(0, height):
        for j in range(0, width):
             img_lbp[i, j] = lbp_calculated_pixel(img_gray, i, j)
    hist_lbp = cv2.calcHist([img_lbp], [0], None, [256], [0, 256])
    return hist_lbp.flatten()


In [11]:
resnet = models.resnet50(pretrained=True)
resnet = nn.Sequential(*list(resnet.children())[:-1])
resnet.eval()

Sequential(
  (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
  (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (4): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)


In [12]:
lfw_folder = '/kaggle/input/lfw-dataset/lfw-deepfunneled/lfw-deepfunneled'
X, y = [], []
for folder_name in os.listdir(lfw_folder):
    folder_path = os.path.join(lfw_folder, folder_name)
    if os.path.isdir(folder_path):
        num_images = len(os.listdir(folder_path))
        if num_images > 70:
            for image_name in os.listdir(folder_path):
                image_path = os.path.join(folder_path, image_name)
                image = imread(image_path)
#                 hog_feature = compute_hog(image)
#                 lbp_feature = calcLBP(image)
                cnn_feature = extract_features(image_path, resnet).numpy()
#                 hog_feature = hog_feature.reshape(-1)
#                 lbp_feature = lbp_feature.reshape(-1)
                cnn_feature = cnn_feature.flatten()
#                 combined_feature = np.concatenate((hog_feature, lbp_feature, cnn_feature))

                X.append(cnn_feature)
                y.append(folder_name)


In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=30)

In [14]:
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

In [18]:
!pip install --upgrade pip

Collecting pip
  Downloading pip-24.0-py3-none-any.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m23.3 MB/s[0m eta [36m0:00:00[0m00:01[0m0:01[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 23.0.1
    Uninstalling pip-23.0.1:
      Successfully uninstalled pip-23.0.1
Successfully installed pip-24.0
[0m

In [21]:
pip install -U libsvm-official

Collecting libsvm-official
  Downloading libsvm-official-3.32.0.tar.gz (39 kB)
  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: libsvm-official
  Building wheel for libsvm-official (setup.py) ... [?25ldone
[?25h  Created wheel for libsvm-official: filename=libsvm_official-3.32.0-cp310-cp310-linux_x86_64.whl size=132551 sha256=13bfb02f7e02493e1633fc6295536cab389441069c101be12bf312c0a4a78e0a
  Stored in directory: /root/.cache/pip/wheels/61/3b/1b/73bb4869517f96a26c82b47ccdb9ec48f12f4466de2371eff6
Successfully built libsvm-official
Installing collected packages: libsvm-official
Successfully installed libsvm-official-3.32.0
[0mNote: you may need to restart the kernel to use updated packages.


In [28]:
from libsvm import svmutil

# Convert data to libsvm format
prob = svmutil.svm_problem(y_train_encoded, X_train)

# Define parameters for the grid search
parameters = [
    {"type": svmutil.C_SVC, "kernel": svmutil.LINEAR, "C": [0.0001, 0.001, 0.1, 1, 10, 100, 1000]},
    {"type": svmutil.C_SVC, "kernel": svmutil.POLY, "degree": [2, 3, 4], "C": [0.0001, 0.001, 0.1, 1, 10, 100, 1000]},
    {"type": svmutil.C_SVC, "kernel": svmutil.RBF, "gamma": [0.0001, 0.001, 0.1, 1, 10, 100, 1000], "C": [0.0001, 0.001, 0.1, 1, 10, 100, 1000]}
]

# Tune hyperparameters via grid search
print("Tuning hyperparameters via grid search")
best_accuracy = 0
best_params = None
for param_set in parameters:
    for C in param_set["C"]:
        param_grid = svmutil.svm_parameter()
        param_grid.svm_type = param_set["type"]
        param_grid.kernel_type = param_set["kernel"]
        param_grid.C = C
        if "degree" in param_set:
            for degree in param_set["degree"]:
                param_grid.degree = degree
                if "gamma" in param_set:
                    param_grid.gamma = param_set["gamma"]
                m = svmutil.svm_train(prob, param_grid)
                p_label, p_acc, p_val = svmutil.svm_predict(y_test_encoded, X_test, m)
                accuracy = p_acc[0]
                if accuracy > best_accuracy:
                    best_accuracy = accuracy
                    best_params = param_grid
print(f"Grid search best accuracy: {best_accuracy:.2f}%")
print(f"Grid search best parameters: {best_params}")

# Train final model with best parameters
final_model = svmutil.svm_train(prob, best_params)
y_pred_svm, _, _ = svmutil.svm_predict(y_test_encoded, X_test, final_model)

# Print classification report
print("SVM Classifier")
print("Classification Report:")
print(classification_report(y_test_encoded, y_pred_svm, target_names=label_encoder.classes_))


Tuning hyperparameters via grid search
*
optimization finished, #iter = 91
nu = 0.366197
obj = -0.018200, rho = -1.000003
nSV = 182, nBSV = 182
*
optimization finished, #iter = 63
nu = 0.268657
obj = -0.012600, rho = -1.000002
nSV = 126, nBSV = 126
*
optimization finished, #iter = 58
nu = 0.250000
obj = -0.011600, rho = -0.999992
nSV = 116, nBSV = 116
*
optimization finished, #iter = 122
nu = 0.462121
obj = -0.024400, rho = -0.999998
nSV = 244, nBSV = 244
*
optimization finished, #iter = 97
nu = 0.385686
obj = -0.019400, rho = -1.000003
nSV = 194, nBSV = 194
*
optimization finished, #iter = 193
nu = 0.644407
obj = -0.038600, rho = -0.999992
nSV = 386, nBSV = 386
*
optimization finished, #iter = 63
nu = 0.818182
obj = -0.012600, rho = -1.000004
nSV = 126, nBSV = 126
*
optimization finished, #iter = 58
nu = 0.778523
obj = -0.011600, rho = -0.999952
nSV = 116, nBSV = 116
*
optimization finished, #iter = 91
nu = 0.854460
obj = -0.018200, rho = 1.000002
nSV = 182, nBSV = 182
*
optimization 

In [29]:
from sklearn.decomposition import PCA
pca = PCA(n_components=0.95)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

In [30]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
lda = LDA(n_components=None)
X_train_lda = lda.fit_transform(X_train, y_train_encoded)
X_test_lda = lda.transform(X_test)


In [31]:
from libsvm import svmutil

# Convert data to libsvm format
prob = svmutil.svm_problem(y_train_encoded, X_train_pca)

# Define parameters for the grid search
parameters = [
    {"type": svmutil.C_SVC, "kernel": svmutil.LINEAR, "C": [0.0001, 0.001, 0.1, 1, 10, 100, 1000]},
    {"type": svmutil.C_SVC, "kernel": svmutil.POLY, "degree": [2, 3, 4], "C": [0.0001, 0.001, 0.1, 1, 10, 100, 1000]},
    {"type": svmutil.C_SVC, "kernel": svmutil.RBF, "gamma": [0.0001, 0.001, 0.1, 1, 10, 100, 1000], "C": [0.0001, 0.001, 0.1, 1, 10, 100, 1000]}
]

# Tune hyperparameters via grid search
print("Tuning hyperparameters via grid search")
best_accuracy = 0
best_params = None
for param_set in parameters:
    for C in param_set["C"]:
        param_grid = svmutil.svm_parameter()
        param_grid.svm_type = param_set["type"]
        param_grid.kernel_type = param_set["kernel"]
        param_grid.C = C
        if "degree" in param_set:
            for degree in param_set["degree"]:
                param_grid.degree = degree
                if "gamma" in param_set:
                    param_grid.gamma = param_set["gamma"]
                m = svmutil.svm_train(prob, param_grid)
                p_label, p_acc, p_val = svmutil.svm_predict(y_test_encoded, X_test_pca, m)
                accuracy = p_acc[0]
                if accuracy > best_accuracy:
                    best_accuracy = accuracy
                    best_params = param_grid
print(f"Grid search best accuracy: {best_accuracy:.2f}%")
print(f"Grid search best parameters: {best_params}")

# Train final model with best parameters
final_model = svmutil.svm_train(prob, best_params)
y_pred_svm, _, _ = svmutil.svm_predict(y_test_encoded, X_test_pca, final_model)

# Print classification report
print("SVM Classifier")
print("Classification Report:")
print(classification_report(y_test_encoded, y_pred_svm, target_names=label_encoder.classes_))


Tuning hyperparameters via grid search
*
optimization finished, #iter = 91
nu = 0.366197
obj = -0.018200, rho = -0.999885
nSV = 182, nBSV = 182
*
optimization finished, #iter = 63
nu = 0.268657
obj = -0.012600, rho = -0.999944
nSV = 126, nBSV = 126
*
optimization finished, #iter = 58
nu = 0.250000
obj = -0.011600, rho = -0.999986
nSV = 116, nBSV = 116
*
optimization finished, #iter = 122
nu = 0.462121
obj = -0.024400, rho = -0.999953
nSV = 244, nBSV = 244
*
optimization finished, #iter = 97
nu = 0.385686
obj = -0.019400, rho = -0.999951
nSV = 194, nBSV = 194
*
optimization finished, #iter = 193
nu = 0.644407
obj = -0.038600, rho = -0.999857
nSV = 386, nBSV = 386
*
optimization finished, #iter = 63
nu = 0.818182
obj = -0.012600, rho = -0.999843
nSV = 126, nBSV = 126
*
optimization finished, #iter = 58
nu = 0.778523
obj = -0.011599, rho = -0.999949
nSV = 116, nBSV = 116
*
optimization finished, #iter = 91
nu = 0.854460
obj = -0.018200, rho = 0.999996
nSV = 182, nBSV = 182
*
optimization 

In [32]:
from libsvm import svmutil

# Convert data to libsvm format
prob = svmutil.svm_problem(y_train_encoded, X_train_lda)

# Define parameters for the grid search
parameters = [
    {"type": svmutil.C_SVC, "kernel": svmutil.LINEAR, "C": [0.0001, 0.001, 0.1, 1, 10, 100, 1000]},
    {"type": svmutil.C_SVC, "kernel": svmutil.POLY, "degree": [2, 3, 4], "C": [0.0001, 0.001, 0.1, 1, 10, 100, 1000]},
    {"type": svmutil.C_SVC, "kernel": svmutil.RBF, "gamma": [0.0001, 0.001, 0.1, 1, 10, 100, 1000], "C": [0.0001, 0.001, 0.1, 1, 10, 100, 1000]}
]

# Tune hyperparameters via grid search
print("Tuning hyperparameters via grid search")
best_accuracy = 0
best_params = None
for param_set in parameters:
    for C in param_set["C"]:
        param_grid = svmutil.svm_parameter()
        param_grid.svm_type = param_set["type"]
        param_grid.kernel_type = param_set["kernel"]
        param_grid.C = C
        if "degree" in param_set:
            for degree in param_set["degree"]:
                param_grid.degree = degree
                if "gamma" in param_set:
                    param_grid.gamma = param_set["gamma"]
                m = svmutil.svm_train(prob, param_grid)
                p_label, p_acc, p_val = svmutil.svm_predict(y_test_encoded, X_test_lda, m)
                accuracy = p_acc[0]
                if accuracy > best_accuracy:
                    best_accuracy = accuracy
                    best_params = param_grid
print(f"Grid search best accuracy: {best_accuracy:.2f}%")
print(f"Grid search best parameters: {best_params}")

# Train final model with best parameters
final_model = svmutil.svm_train(prob, best_params)
y_pred_svm, _, _ = svmutil.svm_predict(y_test_encoded, X_test_lda, final_model)

# Print classification report
print("SVM Classifier")
print("Classification Report:")
print(classification_report(y_test_encoded, y_pred_svm, target_names=label_encoder.classes_))


Tuning hyperparameters via grid search
*
optimization finished, #iter = 91
nu = 0.366197
obj = -0.015345, rho = -0.995229
nSV = 182, nBSV = 182
*
optimization finished, #iter = 34
nu = 0.137970
obj = -0.003836, rho = -1.065192
nSV = 66, nBSV = 64
*
optimization finished, #iter = 39
nu = 0.154983
obj = -0.004275, rho = -1.044314
nSV = 73, nBSV = 70
*
optimization finished, #iter = 122
nu = 0.462121
obj = -0.019528, rho = -0.974530
nSV = 244, nBSV = 244
*
optimization finished, #iter = 97
nu = 0.381710
obj = -0.014218, rho = -0.994141
nSV = 192, nBSV = 192
*
optimization finished, #iter = 193
nu = 0.644407
obj = -0.032129, rho = -0.982946
nSV = 386, nBSV = 386
*
optimization finished, #iter = 31
nu = 0.382794
obj = -0.003431, rho = -0.888837
nSV = 60, nBSV = 58
*
optimization finished, #iter = 36
nu = 0.436031
obj = -0.003828, rho = -0.867106
nSV = 66, nBSV = 63
*
optimization finished, #iter = 92
nu = 0.854460
obj = -0.012958, rho = 0.379310
nSV = 182, nBSV = 182
*
optimization finished