### Pattern Recognition and Machine Learning

#### Face Recognition

In [3]:
# importing libraries to load and test the models
from sklearn.metrics import accuracy_score,classification_report,ConfusionMatrixDisplay
import joblib

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from skimage.feature import hog
from skimage.transform import resize
from skimage.io import imread
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
import numpy as np
import cv2
from tensorflow.keras.models import load_model


2024-04-21 15:29:51.924726: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


##### Feature Extraction

After Feature Extraction we found out that HoG, meaning Histogram of Oriented Gradients, we notice that we get around 70,000 features which is quite bulky for models to handle, so we applied PCA on the HoG features alone and extracted 985 features out of those 70,000 which covers 0.95 variance

We are loading the dataset
Dataset was initally broken into two parts train and test. However, we realised that we had to drop many classes due to lack of sufficient images to train. Thus, we concatenated these two datasets together and had carried out training and testing

In [None]:
# Dataset part 01 contains 10,586 records with 3248 features
# Dataset part 02 contains 2647 records with 3248 features
dataset_part_01 = pd.read_csv("",header = None)
dataset_part_02 = pd.read_csv("",header = None)

dataset = pd.concat([dataset_part_01,dataset_part_02],ignore_index=True)

In [None]:
print(dataset.head())

print(dataset.info())

print(dataset.describe())

We may notice that that features are not in the same range, so, we normalized the data as Zero Mean and One Standard Deviation

Now, We load the models trained and tuned

In [27]:
class Face_Recognition:

    def __init__(self):

        # self.image = None
        self.__models = []
        self.__model_weights = {}
        self.scaler=joblib.load('Joblib_files/StandardScaler.joblib')
        self.ann_list=['Hugo_Chavez','George_W_Bush','Donald_Rumsfeld', 'Colin_Powell'
 'Tony_Blair', 'Junichiro_Koizumi', 'Ariel_Sharon' 'Jean_Chretien'
 'Jacques_Chirac', 'Gerhard_Schroeder', 'Serena_Williams', 'John_Ashcroft']

    def __compute_hog_w_pca(self,image):
        img=imread(image)
        resized_img = resize(img, (128, 64))
        fd, hog_image = hog(resized_img, orientations=9, pixels_per_cell=(8, 8),
                            cells_per_block=(2, 2), visualize=True, channel_axis=-1)
        pca_hog=joblib.load('Joblib_files/pca_hog_model.joblib')
        fd=fd.reshape(1, -1)
        # print(fd.shape)
        hfe=pca_hog.transform(fd)
        return hfe
    
    def __compute_cnn_feat(self,image):
        image = Image.open(image).convert('RGB')
        preprocess = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])
        image = preprocess(image)
        
        # Add batch dimension
        image = image.unsqueeze(0)
        # Extract features
                    # Load pre-trained ResNet-50 model
        resnet = models.resnet50(weights='ResNet50_Weights.DEFAULT')
        # Remove the last fully connected layer
        resnet = nn.Sequential(*list(resnet.children())[:-1])
        # Set the model to evaluation mode
        resnet.eval()
    
        with torch.no_grad():
            features = resnet(image)
        # Remove the batch dimension
        features = features.squeeze(0)
        features_array = np.array([round(feature.item(), 4) for feature in features])

        return features_array

    def __get_pixel(self,img, center, x, y):
        new_value = 0
        try:
            if img[x][y] >= center:
                new_value = 1
        except:
            pass
        return new_value
    
    def __lbp_calculated_pixel(self,img, x, y):
        center = img[x][y]
        val_ar = []
        val_ar.append(self.__get_pixel(img, center, x-1, y-1))     # top_left
        val_ar.append(self.__get_pixel(img, center, x, y-1))       # top
        val_ar.append(self.__get_pixel(img, center, x+1, y-1))     # top_right
        val_ar.append(self.__get_pixel(img, center, x+1, y))       # right
        val_ar.append(self.__get_pixel(img, center, x+1, y+1))     # bottom_right
        val_ar.append(self.__get_pixel(img, center, x, y+1))       # bottom
        val_ar.append(self.__get_pixel(img, center, x-1, y+1))     # bottom_left
        val_ar.append(self.__get_pixel(img, center, x-1, y))       # left

        power_val = [1, 2, 4, 8, 16, 32, 64, 128] #this depict powers of 2 starting from top_left
        val = 0
        for i in range(len(val_ar)):
            val += val_ar[i] * power_val[i]
        return val

    def __calcLBP(self,img):
        height, width, channel = img.shape
        # print(height,width,channel)
        img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

        img_lbp = np.zeros((height, width,3), np.uint8)
        for i in range(0, height):
            for j in range(0, width):
                img_lbp[i, j] = self.__lbp_calculated_pixel(img_gray, i, j)
        hist_lbp = cv2.calcHist([img_lbp], [0], None, [256], [0, 256])  
        hist_lbp=hist_lbp.flatten()
        return hist_lbp

  
    def __lbp_feat(self,image):
        img=cv2.imread(image)
        features_lbp=self.__calcLBP(img)
        return features_lbp


    def __concatenate_features(self,image):
        hog_features=self.__compute_hog_w_pca(image)
        cnn_features=self.__compute_cnn_feat(image)
        lbp_features=self.__lbp_feat(image)
        hog_features=hog_features.reshape(-1)

        

        
        conc_array = np.concatenate((hog_features, cnn_features, lbp_features))
        conc_array_norm=self.scaler.transform(conc_array.reshape(1,-1))

        return conc_array_norm




    def __loadModels (self):
        # We are loading the hyper parameter tuned models that were saved using joblib

        model_Decision_Tree = joblib.load("Joblib_files/tuned_dt_conc_50_gd.joblib")
        self.__models.append(model_Decision_Tree)
        self.__model_weights[model_Decision_Tree] = 10

        model_Random_Forest = joblib.load("Joblib_files/model_all_Random_Forest.joblib")
        self.__models.append(model_Random_Forest)
        self.__model_weights[model_Random_Forest] = 10


        model_SVM = joblib.load("Joblib_files/LinearSVC_All_n.joblib")
        self.__models.append(model_SVM)
        self.__model_weights[model_SVM] = 10


        model_Naive_Bayes = joblib.load("Joblib_files/gaussian.joblib")
        self.__models.append(model_Naive_Bayes)
        self.__model_weights[model_Naive_Bayes] = 10

        # model_kNN = joblib.load("Joblib_files/all_knn.joblib")
        # self.__models.append(model_kNN)
        # self.__model_weights[model_kNN] = 10

    

        model_ANN = load_model("Decision_tree/saved_models/ann_50.h5")
        self.__models.append(model_ANN)
        self.__model_weights[model_ANN] = 10

       


        

        

    
    def __printModel_Parameters (self):
        # Printing the models with their Hyper Parameters

        print("Decision Tree ",self.__models[0])

        print("Random Forest ",self.__models[1])

        print("Support Vector Machine ",self.__models[2])

        print("Naive Bayes ",self.__models[3])

        # print("kNN ",self.__models[4])

        print("Artificial Neural Network ",self.__models[4])

        

        

        
    

    def predict(self,image):

        ## Call the feature extraction function and store it in the variable X ##

        ## Set up the model weights ##

        ## Predictions are stored here
        y_predictions = []
        X=self.__concatenate_features(image)

        self.__loadModels()
        self.__printModel_Parameters()

        # Receive prediction of each model
        for model in self.__models:

            model_prediction = model.predict(X)

            # Add the prediction of model the weight number of times to get better performance

            for iter in range(self.__model_weights[model]):
                if model == self.__models[-1]:  # Check if the model is the ANN
                    ann_index = int(np.argmax(model_prediction))  # Assuming model_prediction is the index
                    print(ann_index)
                    print(self.ann_list)
                    ann_prediction = self.ann_list[ann_index]
                    y_predictions.append(ann_prediction)
                else:
                    y_predictions.append(model_prediction)
        

        # Get the count that has max frequency and return that prediction
        max_count = 0
        predicted_class = ''

        for element in y_predictions:

            count = y_predictions.count(element)
            if y_predictions.count(element) > max_count:
                max_count = count
                predicted_class = element
            
        return predicted_class
    
    def __kl_divergence(self,p, q):
        # Clip values to avoid log(0) and log(1)
        p = np.clip(p, 1e-10, 1 - 1e-10)
        q = np.clip(q, 1e-10, 1 - 1e-10)
        
        # Compute KL divergence
        kl_div = np.sum(p * np.log(p / q))
        
        return kl_div
    
    def get_similarity (self,image1,image2,model):

        ### Write the code here ###
        test1=self.__concatenate_features(image1)
        test2=self.__concatenate_features(image2)

        

        arr_1= test1.reshape(1, -1)
        
        arr_2= test2.reshape(1, -1)

        y1=model.predict(arr_1)
        y2=model.predict(arr_2)

        # Compute the argmax for both predictions
        argmax1 = np.argmax(y1)
        argmax2 = np.argmax(y2)
        
        # getting the label
        # y_pred = label_encoder.inverse_transform(np.argmax(y1, axis=1))
        # print(y_pred)
        print(argmax1)
        # print(y1.shape)
        # print(y2.shape)

        # If the argmax values are the same
        if argmax1 == argmax2:
            # Compute the mean of the probabilities for each class
            mean_probs = (y1 + y2) / 2
            return np.max(mean_probs), "Same"
    
        # If the argmax values are different
        else:
            kl_div = self.__kl_divergence(y1.flatten(), y2.flatten())

            return kl_div, "Different"



In [28]:
faceRecognition = Face_Recognition()
img1='image4.jpg'
img2='image2.jpg'

faceRecognition.predict(img1)



# model=load_model('Decision_tree/saved_models/ann_50.h5')
# faceRecognition.get_similarity(img1,img2,model)    

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Decision Tree  DecisionTreeClassifier(max_depth=8, max_leaf_nodes=40, min_samples_leaf=10,
                       min_samples_split=60, random_state=42)
Random Forest  RandomForestClassifier(max_depth=2500, max_leaf_nodes=500, min_samples_leaf=2,
                       n_estimators=95)
Support Vector Machine  LinearSVC(dual=False)
Naive Bayes  GaussianNB()
Artificial Neural Network  <keras.src.engine.sequential.Sequential object at 0x761ab43b0d30>
3
['Hugo_Chavez', 'George_W_Bush', 'Donald_Rumsfeld', 'Colin_PowellTony_Blair', 'Junichiro_Koizumi', 'Ariel_SharonJean_ChretienJacques_Chirac', 'Gerhard_Schroeder', 'Serena_Williams', 'John_Ashcroft']
3
['Hugo_Chavez', 'George_W_Bush', 'Donald_Rumsfeld', 'Colin_PowellTony_Blair', 'Junichiro_Koizumi', 'Ariel_SharonJean_ChretienJacques_Chirac', 'Gerhard_Schroeder', 'Serena_Williams', 'John_Ashcroft']
3
['Hugo_Chavez', 'George_W_Bush', 'Donald_Rumsfeld', 'Colin_PowellTony_Blair', 'Junichiro_Koizumi', 'Ariel_SharonJean_ChretienJacques_Chirac', 'G

array(['George_W_Bush'], dtype=object)