In [64]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
import math
import torch
import torchvision.transforms as transforms
from facenet_pytorch import MTCNN,InceptionResnetV1
from PIL import Image
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, classification_report
import joblib
from sklearn.svm import SVC
import pandas as pd


In [55]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [56]:
class FaceLoading:
    def __init__(self,directory):
        self.directory = directory
        self.target_size = (112, 112)
        self.X = []
        self.y = []
        self.mtcnn = MTCNN(image_size=160, margin=0, min_face_size=20,
            thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True)
        self.i = 1
    def extract_face(self,path):
        img = cv2.imread(path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        face = self.mtcnn(img)
        return face
    def load_face_and_class(self):
        for sub_dir in os.listdir(self.directory):
            sub_dir_path = os.path.join(self.directory, sub_dir)
            for img_name in os.listdir(sub_dir_path):
                face = self.extract_face(os.path.join(sub_dir_path, img_name))
                if(face is not None):
                    self.X.append(face)
                    self.y.append(sub_dir)
        return np.array(self.X), np.array(self.y)
    def plot_images(self):
        num_columns = 3
        num_rows = math.ceil(len(self.X) / num_columns)  # Ensure row count covers all images

        plt.figure(figsize=(num_columns * 3, num_rows * 3))  # Adjust figure size dynamically
        for num, img in enumerate(self.X):
            plt.subplot(num_rows, num_columns, num + 1)
            plt.imshow(img.permute(1, 2, 0).numpy())
            plt.axis('off')
        plt.tight_layout()  # Improve layout spacing
        plt.show()

In [57]:
face_loading = FaceLoading(r"C:\Users\admin\OneDrive - Hanoi University of Science and Technology\Documents\GitHub\PTTK\face_recognization\source\data_raw\image")
X,y = face_loading.load_face_and_class()

In [58]:
encode = LabelEncoder()
encoded_y = encode.fit_transform(y)
encoded_y = encoded_y.reshape(-1, 1)
onehot_encoder = OneHotEncoder(sparse_output=False)  # sparse_output=False để trả về mảng NumPy
onehot_y = onehot_encoder.fit_transform(encoded_y)
print(onehot_y)

[[1. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 1.

In [59]:
# face_loading.plot_images()

In [60]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
resnet = InceptionResnetV1(pretrained='vggface2').eval().to(device)

In [63]:

# Assuming resnet is your InceptionResnetV1 model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
resnet = resnet.to(device)
resnet.eval()  # Set model to evaluation mode

# Preprocessing pipeline
preprocess = transforms.Compose([
    transforms.Resize((160, 160)),  # Resize to 160x160
    transforms.ToTensor(),          # Converts to (C, H, W) and normalizes to [0, 1]
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  # Normalize to [-1, 1]
])

X_embed = []
for face in X:
    if isinstance(face, np.ndarray):
        # Check shape and type
        print(f"Shape: {face.shape}, Dtype: {face.dtype}")

        # Ensure face is a valid image array
        if face.ndim == 3:
            if face.shape[-1] in [1, 3]:  # (H, W, C) format
                if face.shape[-1] == 1:  # Convert grayscale to RGB
                    face = np.repeat(face, 3, axis=-1)
            elif face.shape[0] in [1, 3]:  # (C, H, W) format
                face = face.transpose(1, 2, 0)  # Convert to (H, W, C)
                if face.shape[-1] == 1:  # Convert grayscale to RGB
                    face = np.repeat(face, 3, axis=-1)
            else:
                raise ValueError(f"Invalid face shape: {face.shape}. Expected (H, W, 3), (H, W, 1), or (3, H, W).")
        elif face.ndim == 2:  # Grayscale (H, W)
            face = np.stack([face] * 3, axis=-1)  # Convert to RGB
        else:
            raise ValueError(f"Invalid face shape: {face.shape}. Expected (H, W, 3), (H, W, 1), or (H, W).")

        # Ensure uint8 for PIL (if float, convert to uint8)
        if face.dtype != np.uint8:
            face = (face * 255).clip(0, 255).astype(np.uint8)

        face_pil = Image.fromarray(face)

        # Apply preprocessing
        face_tensor = preprocess(face_pil).unsqueeze(0).to(device)  # Shape: [1, 3, 160, 160]

        # Get embedding
        with torch.no_grad():
            embedding = resnet(face_tensor).cpu().detach().numpy()
        X_embed.append(embedding)
    else:
        raise ValueError("Each face in X should be a NumPy array")

X_embed = np.array(X_embed)

Shape: (3, 160, 160), Dtype: float32
Shape: (3, 160, 160), Dtype: float32
Shape: (3, 160, 160), Dtype: float32
Shape: (3, 160, 160), Dtype: float32
Shape: (3, 160, 160), Dtype: float32
Shape: (3, 160, 160), Dtype: float32
Shape: (3, 160, 160), Dtype: float32
Shape: (3, 160, 160), Dtype: float32
Shape: (3, 160, 160), Dtype: float32
Shape: (3, 160, 160), Dtype: float32
Shape: (3, 160, 160), Dtype: float32
Shape: (3, 160, 160), Dtype: float32
Shape: (3, 160, 160), Dtype: float32
Shape: (3, 160, 160), Dtype: float32
Shape: (3, 160, 160), Dtype: float32
Shape: (3, 160, 160), Dtype: float32
Shape: (3, 160, 160), Dtype: float32
Shape: (3, 160, 160), Dtype: float32
Shape: (3, 160, 160), Dtype: float32
Shape: (3, 160, 160), Dtype: float32
Shape: (3, 160, 160), Dtype: float32
Shape: (3, 160, 160), Dtype: float32
Shape: (3, 160, 160), Dtype: float32
Shape: (3, 160, 160), Dtype: float32
Shape: (3, 160, 160), Dtype: float32
Shape: (3, 160, 160), Dtype: float32
Shape: (3, 160, 160), Dtype: float32
S

In [69]:
def save_embeddings_and_model(X_embed, y, svm_model, label_encoder,
                             embed_path='embeddings.npy',
                             label_path='labels.npy',
                             csv_path='embeddings_labels.csv',
                             model_path='svm_model.joblib',
                             encoder_path='label_encoder.joblib'):
    """Lưu embedding, nhãn, mô hình SVM và LabelEncoder."""
    # Lưu embedding và nhãn vào file .npy
    np.save(embed_path, X_embed)
    np.save(label_path, y)
    print(f"Saved embeddings to {embed_path}")
    print(f"Saved labels to {label_path}")

    # Lưu vào file CSV
    df = pd.DataFrame(X_embed)
    df['label'] = y
    df.to_csv(csv_path, index=False)
    print(f"Saved embeddings and labels to {csv_path}")

    # Lưu mô hình SVM
    joblib.dump(svm_model, model_path)
    print(f"Saved SVM model to {model_path}")

    # Lưu LabelEncoder
    joblib.dump(label_encoder, encoder_path)
    print(f"Saved LabelEncoder to {encoder_path}")

In [70]:
if X_embed.ndim == 3:
    X_embed = X_embed.squeeze(1)  # Chuyển từ (N, 1, 512) thành (N, 512)


label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

print(f"X_embed shape: {X_embed.shape}")
print(f"y shape: {y.shape}")

X_train, X_test, y_train, y_test = train_test_split(
    X_embed, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)

svm_model = SVC(kernel='rbf', C=1.0, probability=True, random_state=42)
svm_model.fit(X_train, y_train)
y_pred = svm_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy on test set: {accuracy:.4f}")
save_embeddings_and_model(
    X_embed,
    y,
    svm_model,
    label_encoder,
    embed_path='embeddings.npy',
    label_path='labels.npy',
    csv_path='embeddings_labels.csv',
    model_path='svm_model.joblib',
    encoder_path='label_encoder.joblib'
)

X_embed shape: (104, 512)
y shape: (104,)
Accuracy on test set: 1.0000
Saved embeddings to embeddings.npy
Saved labels to labels.npy
Saved embeddings and labels to embeddings_labels.csv
Saved SVM model to svm_model.joblib
Saved LabelEncoder to label_encoder.joblib
