In [None]:
# importing important libraries and its functionalities
import os
import cv2
import numpy as np
from skimage.io import imread
from skimage.transform import resize
from skimage.feature import hog
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier

In [None]:
import os
from zipfile import ZipFile


!pip install kaggle


os.environ['KAGGLE_USERNAME'] = "majisouvik1099"
os.environ['KAGGLE_KEY'] = "9464225218d56f8bfea9dd9cc437489f"

# download the datset needed
!kaggle datasets download -d jessicali9530/lfw-dataset

# extract the files by unzipping
with ZipFile('lfw-dataset.zip', 'r') as zip_ref:
    zip_ref.extractall('lfw-dataset')


os.remove('lfw-dataset.zip')


In [9]:
# defining some of the important functions that are needed 

# defining feature extraction for resnet-50
def extract_features(image_path, model):
    image = Image.open(image_path).convert('RGB')
    preprocess = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    image = preprocess(image)
    image = image.unsqueeze(0)
    with torch.no_grad():
        features = model(image)
    features = features.squeeze(0)
    return features

# hog calculation
def compute_hog(img):
    resized_img = resize(img, (128*4, 64*4))
    fd, hog_image = hog(resized_img, orientations=9, pixels_per_cell=(8, 8),
                    cells_per_block=(2, 2), visualize=True,channel_axis=-1)
    return fd


def get_pixel(img, center, x, y):
    new_value = 0
    try:
        if img[x][y] >= center:
            new_value = 1
    except:
        pass
    return new_value

# calculate lbp

def lbp_calculated_pixel(img, x, y):
    center = img[x][y]
    val_ar = []
    val_ar.append(get_pixel(img, center, x-1, y+1))
    val_ar.append(get_pixel(img, center, x, y+1))
    val_ar.append(get_pixel(img, center, x+1, y+1))
    val_ar.append(get_pixel(img, center, x+1, y))
    val_ar.append(get_pixel(img, center, x+1, y-1))
    val_ar.append(get_pixel(img, center, x, y-1))
    val_ar.append(get_pixel(img, center, x-1, y-1))
    val_ar.append(get_pixel(img, center, x-1, y))

    power_val = [1, 2, 4, 8, 16, 32, 64, 128]
    val = 0
    for i in range(len(val_ar)):
        val += val_ar[i] * power_val[i]
    return val

def calcLBP(img):
    height, width, channel = img.shape
    img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    img_lbp = np.zeros((height, width,3), np.uint8)
    for i in range(0, height):
        for j in range(0, width):
             img_lbp[i, j] = lbp_calculated_pixel(img_gray, i, j)
    hist_lbp = cv2.calcHist([img_lbp], [0], None, [256], [0, 256])
    return hist_lbp.flatten()


In [7]:
# pretrained resnet-50 model
resnet = models.resnet50(pretrained=True)
resnet = nn.Sequential(*list(resnet.children())[:-1])
resnet.eval()

Sequential(
  (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
  (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (4): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)


In [38]:
# extracting the features from the folder
lfw_folder = '/kaggle/input/lfw-dataset/lfw-deepfunneled/lfw-deepfunneled'
X, y = [], []
for folder_name in os.listdir(lfw_folder):
    folder_path = os.path.join(lfw_folder, folder_name)
    if os.path.isdir(folder_path):
        num_images = len(os.listdir(folder_path))
        if num_images > 70: # taking those persons who have atleast 70 images 
            for image_name in os.listdir(folder_path):
                image_path = os.path.join(folder_path, image_name)
                image = imread(image_path)
                hog_feature = compute_hog(image) # hog features
                lbp_feature = calcLBP(image)     # lbp features
                cnn_feature = extract_features(image_path, resnet).numpy() #cnn features
                hog_feature = hog_feature.reshape(-1) 
                lbp_feature = lbp_feature.reshape(-1)
                cnn_feature = cnn_feature.flatten()
                combined_feature = np.concatenate((hog_feature, lbp_feature, cnn_feature)) # combining features as needed

                X.append(combined_feature)
                y.append(folder_name)


In [39]:
# hog features extracted
hog_feature

array([0., 0., 0., ..., 0., 0., 0.])

In [40]:
# printing the shape
np.array(X).shape

(1288, 72612)

In [41]:
# train-test split 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=30)

In [42]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
import numpy as np

# Grid
param_grid = {'C': np.logspace(-3, 3, 7)}

# L1 regularization
lasso_logreg = LogisticRegression(penalty='l1', solver='liblinear', max_iter=10000)

# find the best hyperparameters
grid_search = GridSearchCV(lasso_logreg, param_grid, cv=5)

grid_search.fit(X_train, y_train)

In [43]:
# best model
best_lasso_logreg = grid_search.best_estimator_

# Fit the model
best_lasso_logreg.fit(X_train, y_train)

# prediction
y_pred = best_lasso_logreg.predict(X_test)

# Evaluation
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
# classification report
report = classification_report(y_test, y_pred)

print("Classification Report:\n", report)

Accuracy: 0.9573643410852714
Classification Report:
                    precision    recall  f1-score   support

     Ariel_Sharon       1.00      0.86      0.92        14
     Colin_Powell       0.93      0.93      0.93        43
  Donald_Rumsfeld       1.00      1.00      1.00        24
    George_W_Bush       0.97      0.98      0.98       124
Gerhard_Schroeder       0.94      0.94      0.94        18
      Hugo_Chavez       0.92      0.92      0.92        13
       Tony_Blair       0.91      0.91      0.91        22

         accuracy                           0.96       258
        macro avg       0.95      0.94      0.94       258
     weighted avg       0.96      0.96      0.96       258



In [44]:
# prediction
y_pred = best_lasso_logreg.predict(X_train)

# Evaluation
accuracy = accuracy_score(y_train, y_pred)
print("Accuracy:", accuracy)
# classification report
report = classification_report(y_train, y_pred)

print("Classification Report:\n", report)

Accuracy: 1.0
Classification Report:
                    precision    recall  f1-score   support

     Ariel_Sharon       1.00      1.00      1.00        63
     Colin_Powell       1.00      1.00      1.00       193
  Donald_Rumsfeld       1.00      1.00      1.00        97
    George_W_Bush       1.00      1.00      1.00       406
Gerhard_Schroeder       1.00      1.00      1.00        91
      Hugo_Chavez       1.00      1.00      1.00        58
       Tony_Blair       1.00      1.00      1.00       122

         accuracy                           1.00      1030
        macro avg       1.00      1.00      1.00      1030
     weighted avg       1.00      1.00      1.00      1030



In [50]:
# Print the best hyperparameters 
print("Best hyperparameters:", grid_search.best_params_)

Best hyperparameters: {'C': 1000.0}


In [51]:
# This is the best model we have found, so we will generate pickle file of this model and demo code using this model
demo = LogisticRegression(C=1000, penalty='l1', solver='liblinear', max_iter=10000)

In [53]:
import pickle

# generating pickle file of the pretrained model
with open('demo.pkl', 'wb') as f:
    pickle.dump(demo, f)
