[Description]: in the notebook facerecognization is testing with base classifiers using landmarks detected from dlib


In [1]:

# Define the CNN architecture
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import lightning as L

from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from lightning.pytorch.callbacks import ModelCheckpoint
from lightning.pytorch.loggers.tensorboard import TensorBoardLogger
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
import torch
from facenet_pytorch import MTCNN,InceptionResnetV1
from facenet_pytorch.models.inception_resnet_v1 import BasicConv2d
from torchvision import models, transforms
import torch.nn as nn
import cv2
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
device = 'cuda' if torch.cuda.is_available() else 'cpu'

class FaceClassification(L.LightningModule):
    def __init__(self,num_classes):
        super().__init__()
        self.gray_scale_input = nn.Conv2d(in_channels=1, out_channels=3, kernel_size=3, stride=1, padding=1)  
        inception =InceptionResnetV1(pretrained='vggface2', classify=True, num_classes=num_classes)
                # Change the input layer to accept grayscale images
        
       
        self.classifier = inception.to(device)
        
        

    def forward(self,x):
        #print()
        if x.ndim ==3:
            x = x.unsqueeze(1)
            x=self.gray_scale_input(x)
       
        x=self.classifier(x)
       
        return x

# Save the model's state dictionary




  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import os
from custom_model.basic_landmark_classifier import LandmarkClassifier
import PIL.Image
import cv2
import dlib
import numpy as np
from imutils import face_utils
from pathlib import Path
import PIL
import pandas as pd
import torch
normalize = transforms.Normalize(mean=[0.5], std=[0.5])

class FRBaseFacenet:
    def __init__(self,classifier_weight_path,id_name_csv_path,image_size=128) :
        names_df=pd.read_csv(id_name_csv_path,index_col=0)
        self.ids_name=names_df[["name","id"]].groupby(["name","id"]).mean().reset_index()
        self.classifier_weight_path=Path(classifier_weight_path)
        self.image_size=image_size
        self.load_pretrained_weight()


    def get_class_name(self,pred_id):
        name=self.ids_name[self.ids_name["id"]==pred_id]["name"].values[-1]
        return name.split("_")[-1]
    def load_pretrained_weight(self):
        # If required, create a face detection pipeline using MTCNN:
        self.mtcnn = MTCNN(image_size=self.image_size,keep_all=True, device=device)
        try:
            self.classifier =FaceClassification(len(self.ids_name))
            self.classifier.load_state_dict(torch.load(str(self.classifier_weight_path)))
            print(f"classifier weight loaded successfully")
        except  Exception as e:
            print(f"classifier loading weight exception occur {e}")
        


    def get_gray_img(self, image):
        # Check if the input is a valid numpy array
        if isinstance(image, np.ndarray):
            # Check if the image is in RGB format
            if image.ndim == 3 and image.shape[2] == 3:
                gray_image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
                return gray_image
            else:
                raise ValueError(f"Expected an RGB image, but got shape: {image.shape}")
        else:
            raise TypeError(f"Expected a numpy array, but got type: {type(image)}") 

    def face_recognize(self, video_path,resize_width=460)->None:
        cam = cv2.VideoCapture(str(video_path))
        cv2.namedWindow("Capture Face")
        self.classifier.eval()
        while True:
            ret, frame = cam.read()
            if not ret:
                break                
            # Resize the frame to the specified width while maintaining aspect ratio
            height, width = frame.shape[:2]
            aspect_ratio = height / width
            new_height = int(resize_width * aspect_ratio)
            frame_resized = cv2.resize(frame, (resize_width, new_height))
            #mtcnn
            boxes,_ = self.mtcnn.detect(frame_resized)
            cropped_faces=[]
            if boxes is not None:
                for box in boxes:
                    x1, y1, x2, y2 = map(int, box)  # Convert to int
                    # Draw rectangle around the detected face
                    
                    face = frame_resized[y1:y2, x1:x2]
                    if face.size == 0:
                        print("zero size face")
                        continue
                    cv2.rectangle(frame_resized, (x1, y1), (x2, y2), (0, 255, 0), 2)
                    face_resized = cv2.resize(face, (self.image_size, self.image_size))
                    
                    face_resized=self.get_gray_img(face_resized)
                    
                    face_resized=torch.tensor(face_resized,dtype=torch.float32,device=device)
                    
                    # Add the channel dimension (1, H, W) for grayscale image
                    if len(face_resized.shape) == 2:  # if it's grayscale (H, W)
                        face_resized = face_resized.unsqueeze(0)  # Convert to (1, H, W) format
                    face_resized/=255.0
                    face_resized=normalize(face_resized)
                    face_resized=face_resized.squeeze(0) 
                    cropped_faces.append(face_resized)
                if len(cropped_faces)>0:

                    cropped_faces=torch.tensor( np.stack(cropped_faces),dtype=torch.float32,device=device)
                    
                    logits = self.classifier(cropped_faces)
                    probs=F.softmax(logits, dim=1)
                    #print(probs)
                    _, indices = torch.max(probs, dim=1)
                    for i,at_prob in enumerate(probs):
                        
                        index=indices[i].item()
                        prob=at_prob[index]
                        #print(f"prob:{prob}")
                        if prob>0.6:
                            text_color = (0, 255, 0)
                            cls_name=self.get_class_name(index)
                            cv2.putText(frame_resized, cls_name, (x1 + 2, y1 - 2),
                                    cv2.FONT_HERSHEY_SIMPLEX, 1, text_color,1, cv2.LINE_AA)  # White text, thicker, anti-aliased
                            
                        elif prob<0.3:
                            cls_name="not in database"
                            #text_color=(0, 0, 255)
                            #cv2.putText(frame_resized, cls_name, (x1 + 2, y1 - 2),
                            #        cv2.FONT_HERSHEY_SIMPLEX, 1, text_color,1, cv2.LINE_AA)  # White text, thicker, anti-aliased
                            
                        else:
                            cls_name="recognizing..."
                            text_color = (255, 255, 255)
                            cv2.putText(frame_resized, cls_name, (x1 + 2, y1 - 2),
                                    cv2.FONT_HERSHEY_SIMPLEX, 1, text_color,1, cv2.LINE_AA)  # White text, thicker, anti-aliased
                            
                        print(f"cls_name:{cls_name} with prob{prob}")
                else:
                    print("no faces")


                cv2.imshow("Capture Face", frame_resized)
                
                k = cv2.waitKey(1)
                if k % 256 == 27 :  # ESC or 20 images collected
                    break

        cam.release()
        cv2.destroyAllWindows()

 
        
         
        
obj=FRBaseFacenet(


    "./weights/face_classification_model_3.pth",
    "./weights/name_id.csv"
    )



classifier weight loaded successfully


In [3]:
obj.face_recognize("./videos/vid1.mp4",600)

cls_name:sameer with prob0.9996067881584167
cls_name:sameer with prob0.9996215105056763
cls_name:sameer with prob0.999626874923706
cls_name:sameer with prob0.9996069073677063
cls_name:sameer with prob0.9995924830436707
cls_name:sameer with prob0.9996188879013062
cls_name:sameer with prob0.999596893787384
cls_name:sameer with prob0.9995887875556946
cls_name:sameer with prob0.9996086955070496
cls_name:sameer with prob0.9996199607849121
cls_name:sameer with prob0.9996022582054138
cls_name:sameer with prob0.9995922446250916
cls_name:sameer with prob0.9996131062507629
cls_name:sameer with prob0.9996187686920166
cls_name:sameer with prob0.9995974898338318
cls_name:sameer with prob0.9995997548103333
cls_name:sameer with prob0.9996180534362793
cls_name:sameer with prob0.9996089339256287
cls_name:sameer with prob0.9995989203453064
cls_name:sameer with prob0.9996107220649719
cls_name:sameer with prob0.9995982050895691
cls_name:sameer with prob0.9996129870414734
cls_name:sameer with prob0.9995964