In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
import os
from PIL import Image
import glob
import random
import numpy as np
from sklearn.metrics import f1_score, precision_score, recall_score, classification_report
import time

# ƒê·ªãnh nghƒ©a Siamese Network
class SiameseNetwork(nn.Module):
    def __init__(self):
        super(SiameseNetwork, self).__init__()
        # S·ª≠ d·ª•ng ResNet18 l√†m backbone, lo·∫°i b·ªè l·ªõp fully connected
        self.backbone = models.resnet18(pretrained=False)
        self.backbone.fc = nn.Identity()  # Lo·∫°i b·ªè l·ªõp fully connected
        self.fc = nn.Linear(512, 128)  # Gi·∫£m chi·ªÅu vector ƒë·∫∑c tr∆∞ng xu·ªëng 128

    def forward_one(self, x):
        # X·ª≠ l√Ω m·ªôt nh√°nh c·ªßa Siamese Network
        x = self.backbone(x)
        x = self.fc(x)
        return x

    def forward(self, input1, input2):
        # X·ª≠ l√Ω hai h√¨nh ·∫£nh ƒë·∫ßu v√†o
        output1 = self.forward_one(input1)
        output2 = self.forward_one(input2)
        return output1, output2

# H√†m Contrastive Loss
class ContrastiveLoss(nn.Module):
    def __init__(self, margin=2.0):
        super(ContrastiveLoss, self).__init__()
        self.margin = margin

    def forward(self, output1, output2, label):
        # T√≠nh kho·∫£ng c√°ch Euclidean
        euclidean_distance = torch.nn.functional.pairwise_distance(output1, output2)
        # Loss cho c·∫∑p gi·ªëng nhau (label=1)
        loss_same = label * torch.pow(euclidean_distance, 2)
        # Loss cho c·∫∑p kh√°c nhau (label=0)
        loss_diff = (1 - label) * torch.pow(torch.clamp(self.margin - euclidean_distance, min=0.0), 2)
        loss = torch.mean(loss_same + loss_diff) / 2
        return loss, euclidean_distance

# ƒê·ªãnh nghƒ©a dataset cho Siamese Network
class SiameseFaceDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.image_paths = []
        self.labels = []
        self.label_to_idx = {}
        
        # Load t·∫•t c·∫£ ·∫£nh v√† nh√£n
        folders = [f for f in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, f))]
        for idx, folder in enumerate(sorted(folders)):
            self.label_to_idx[folder] = idx
            image_files = glob.glob(os.path.join(root_dir, folder, "*.jpg")) + \
                         glob.glob(os.path.join(root_dir, folder, "*.png"))
            for img_path in image_files:
                self.image_paths.append(img_path)
                self.labels.append(idx)
        
        self.num_classes = len(self.label_to_idx)
        self.label_to_images = {i: [] for i in range(self.num_classes)}
        for img_path, label in zip(self.image_paths, self.labels):
            self.label_to_images[label].append(img_path)

    def __len__(self):
        return len(self.image_paths) * 2  # TƒÉng s·ªë l∆∞·ª£ng m·∫´u ƒë·ªÉ t·∫°o c·∫∑p

    def __getitem__(self, idx):
        # L·∫•y ng·∫´u nhi√™n m·ªôt ·∫£nh
        img1_idx = random.randint(0, len(self.image_paths) - 1)
        img1_path = self.image_paths[img1_idx]
        label1 = self.labels[img1_idx]
        img1 = Image.open(img1_path).convert('RGB')

        # Quy·∫øt ƒë·ªãnh ng·∫´u nhi√™n t·∫°o c·∫∑p gi·ªëng hay kh√°c
        should_get_same_class = random.randint(0, 1)
        if should_get_same_class:
            # Ch·ªçn m·ªôt ·∫£nh kh√°c c√πng l·ªõp
            same_class_images = self.label_to_images[label1]
            img2_path = random.choice(same_class_images)
            label = 1  # C√πng ng∆∞·ªùi
        else:
            # Ch·ªçn m·ªôt ·∫£nh t·ª´ l·ªõp kh√°c
            different_class = random.choice([l for l in range(self.num_classes) if l != label1])
            img2_path = random.choice(self.label_to_images[different_class])
            label = 0  # Kh√°c ng∆∞·ªùi

        img2 = Image.open(img2_path).convert('RGB')

        if self.transform:
            img1 = self.transform(img1)
            img2 = self.transform(img2)

        return img1, img2, torch.tensor(label, dtype=torch.float32)

# H√†m hu·∫•n luy·ªán Siamese Network
def train_siamese_model(model, dataloader, criterion, optimizer, num_epochs=5, device='cuda'):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        for img1, img2, labels in dataloader:
            img1, img2, labels = img1.to(device), img2.to(device), labels.to(device)
            optimizer.zero_grad()
            output1, output2 = model(img1, img2)
            loss, distances = criterion(output1, output2, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            # T√≠nh accuracy d·ª±a tr√™n ng∆∞·ª°ng kho·∫£ng c√°ch
            predictions = (distances < 1.0).float()  # Ng∆∞·ª°ng 1.0 ƒë·ªÉ ph√¢n lo·∫°i
            correct += (predictions == labels).sum().item()
            total += labels.size(0)
        accuracy = 100 * correct / total
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(dataloader):.4f}, Accuracy: {accuracy:.2f}%')
    
    # L∆∞u m√¥ h√¨nh
    torch.save(model.state_dict(), 'siamese_face_recognition_model.pth')
    print("‚úÖ M√¥ h√¨nh Siamese Network ƒë√£ ƒë∆∞·ª£c l∆∞u v√†o 'siamese_face_recognition_model.pth'")

# H√†m ƒë√°nh gi√° Siamese Network
def evaluate_siamese_model(model, dataloader, device='cuda'):
    model.eval()
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for img1, img2, labels in dataloader:
            img1, img2, labels = img1.to(device), img2.to(device), labels.to(device)
            output1, output2 = model(img1, img2)
            _, distances = ContrastiveLoss()(output1, output2, labels)
            predictions = (distances < 1.0).float()  # Ng∆∞·ª°ng 1.0
            all_preds.extend(predictions.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    # T√≠nh F1 score, precision, recall
    f1 = f1_score(all_labels, all_preds, average='weighted')
    precision = precision_score(all_labels, all_preds, average='weighted')
    recall = recall_score(all_labels, all_preds, average='weighted')
    
    # In b√°o c√°o chi ti·∫øt
    print("Classification Report:")
    print(classification_report(all_labels, all_preds, target_names=['Different', 'Same']))
    
    return f1, precision, recall

# H√†m d·ª± ƒëo√°n cho m·ªôt ·∫£nh
def predict_siamese(model, image, reference_images, transform, label_to_idx, device='cuda', threshold=1.0):
    model.eval()
    start_time = time.time()
    idx_to_label = {v: k for k, v in label_to_idx.items()}
    min_distance = float('inf')
    predicted_label = "Unknown"
    
    with torch.no_grad():
        image_tensor = transform(image).unsqueeze(0).to(device)
        feature1 = model.forward_one(image_tensor)
        
        for ref_img_path, ref_label in reference_images:
            ref_img = Image.open(ref_img_path).convert('RGB')
            ref_tensor = transform(ref_img).unsqueeze(0).to(device)
            feature2 = model.forward_one(ref_tensor)
            distance = torch.nn.functional.pairwise_distance(feature1, feature2).item()
            
            if distance < min_distance and distance < threshold:
                min_distance = distance
                predicted_label = idx_to_label[ref_label]
    
    end_time = time.time()
    inference_time = end_time - start_time
    return predicted_label, min_distance, inference_time

# H√†m d·ª± ƒëo√°n cho to√†n b·ªô ·∫£nh trong m·ªôt th∆∞ m·ª•c
def predict_siamese_folder(model, folder_path, reference_images, transform, label_to_idx, device='cuda', threshold=1.0):
    print(f"\nPredicting images in folder: {folder_path}")
    
    image_files = glob.glob(os.path.join(folder_path, "*.jpg")) + \
                  glob.glob(os.path.join(folder_path, "*.png"))
    
    if not image_files:
        print("No images found in the folder. Please check the directory.")
        return
    
    results = []
    total_inference_time = 0.0
    prediction_counts = {}

    for img_path in image_files:
        try:
            img = Image.open(img_path).convert('RGB')
            result, distance, inference_time = predict_siamese(model, img, reference_images, transform, label_to_idx, device, threshold)
            
            print(f"Image: {os.path.basename(img_path)}")
            print(f"Prediction: {result} (Distance: {distance:.4f})")
            print(f"Inference time: {inference_time:.6f} seconds")
            print("-" * 50)
            
            results.append((img_path, result, inference_time))
            total_inference_time += inference_time
            prediction_counts[result] = prediction_counts.get(result, 0) + 1
            
        except Exception as e:
            print(f"Error processing {img_path}: {e}")
    
    print("\nSummary:")
    print(f"Total images processed: {len(results)}")
    print(f"Average inference time: {total_inference_time / len(results):.6f} seconds")
    print("\nPrediction counts:")
    for label, count in prediction_counts.items():
        print(f"{label}: {count} images")

# Thi·∫øt l·∫≠p v√† hu·∫•n luy·ªán
if __name__ == "__main__":
    # Thi·∫øt l·∫≠p device
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    
    # Transform cho ·∫£nh
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    
    # T·∫°o dataset v√† dataloader
    dataset = SiameseFaceDataset(root_dir=r"C:\Users\Divu\Desktop\DADN\detect_face\extracted_faces", transform=transform)
    dataloader = DataLoader(dataset, batch_size=32, shuffle=True)
    
    # Kh·ªüi t·∫°o m√¥ h√¨nh v√† c√°c th√†nh ph·∫ßn
    model = SiameseNetwork().to(device)
    criterion = ContrastiveLoss(margin=2.0)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    # Hu·∫•n luy·ªán m√¥ h√¨nh
    train_siamese_model(model, dataloader, criterion, optimizer, num_epochs=10, device=device)
    
    # ƒê√°nh gi√° m√¥ h√¨nh
    print("\nƒê√°nh gi√° tr√™n t·∫≠p hu·∫•n luy·ªán:")
    f1, precision, recall = evaluate_siamese_model(model, dataloader, device)
    print(f"F1 Score: {f1:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}")
    
    # Chu·∫©n b·ªã ·∫£nh tham chi·∫øu (reference images) cho d·ª± ƒëo√°n
    reference_images = []
    for label, img_paths in dataset.label_to_images.items():
        # Ch·ªçn m·ªôt ·∫£nh ƒë·∫°i di·ªán cho m·ªói nh√£n
        reference_images.append((random.choice(img_paths), label))
    
    # D·ª± ƒëo√°n tr√™n th∆∞ m·ª•c test
    test_folder = r"C:\Users\Divu\Desktop\DADN\detect_face\test_faces"
    predict_siamese_folder(model, test_folder, reference_images, transform, dataset.label_to_idx, device, threshold=1.0)



Epoch 1/10, Loss: 0.2567, Accuracy: 89.23%
Epoch 2/10, Loss: 0.0375, Accuracy: 98.87%
Epoch 3/10, Loss: 0.0292, Accuracy: 99.00%
Epoch 4/10, Loss: 0.0186, Accuracy: 99.49%
Epoch 5/10, Loss: 0.0110, Accuracy: 99.62%
Epoch 6/10, Loss: 0.0144, Accuracy: 99.59%
Epoch 7/10, Loss: 0.0098, Accuracy: 99.74%
Epoch 8/10, Loss: 0.0062, Accuracy: 99.92%
Epoch 9/10, Loss: 0.0059, Accuracy: 99.90%
Epoch 10/10, Loss: 0.0063, Accuracy: 99.90%
‚úÖ M√¥ h√¨nh Siamese Network ƒë√£ ƒë∆∞·ª£c l∆∞u v√†o 'siamese_face_recognition_model.pth'

ƒê√°nh gi√° tr√™n t·∫≠p hu·∫•n luy·ªán:
Classification Report:
              precision    recall  f1-score   support

   Different       1.00      1.00      1.00      1946
        Same       1.00      1.00      1.00      1952

    accuracy                           1.00      3898
   macro avg       1.00      1.00      1.00      3898
weighted avg       1.00      1.00      1.00      3898

F1 Score: 1.0000, Precision: 1.0000, Recall: 1.0000

Predicting images in folder: C:\Us

In [1]:
import os
import cv2
import time
from datetime import datetime
import face_recognition
import numpy as np
import torch
import torch.nn as nn
from PIL import Image
from torchvision import transforms
import glob
import random

# ƒê·ªãnh nghƒ©a backbone CNN ƒë∆°n gi·∫£n
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),  # Input: 3x224x224 -> Output: 32x224x224
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),       # Output: 32x112x112
            nn.Conv2d(32, 64, kernel_size=3, padding=1), # Output: 64x112x112
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),       # Output: 64x56x56
            nn.Conv2d(64, 128, kernel_size=3, padding=1),# Output: 128x56x56
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)        # Output: 128x28x28
        )
        self.fc = nn.Linear(128 * 28 * 28, 128)  # Gi·∫£m chi·ªÅu v·ªÅ vector 128

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)  # Flatten
        x = self.fc(x)
        return x

# ƒê·ªãnh nghƒ©a Siamese Network
class SiameseNetwork(nn.Module):
    def __init__(self):
        super(SiameseNetwork, self).__init__()
        self.backbone = SimpleCNN()

    def forward_one(self, x):
        return self.backbone(x)

    def forward(self, input1, input2):
        output1 = self.forward_one(input1)
        output2 = self.forward_one(input2)
        return output1, output2

# H√†m d·ª± ƒëo√°n cho Siamese Network
def predict(model, image, reference_images, transform, label_to_idx, device='cuda', threshold=1.0):
    model.eval()
    start_time = time.time()
    idx_to_label = {v: k for k, v in label_to_idx.items()}
    min_distance = float('inf')
    predicted_label = "Unknown"
    
    with torch.no_grad():
        image_tensor = transform(image).unsqueeze(0).to(device)
        feature1 = model.forward_one(image_tensor)
        
        for ref_img_path, ref_label in reference_images:
            ref_img = Image.open(ref_img_path).convert('RGB')
            ref_tensor = transform(ref_img).unsqueeze(0).to(device)
            feature2 = model.forward_one(ref_tensor)
            distance = torch.nn.functional.pairwise_distance(feature1, feature2).item()
            
            if distance < min_distance and distance < threshold:
                min_distance = distance
                predicted_label = idx_to_label[ref_label]
    
    end_time = time.time()
    inference_time = end_time - start_time
    return predicted_label, inference_time

class FaceRecognitionSystem:
    def __init__(self, dataset_path=r"C:\Users\Divu\Desktop\DADN\detect_face\extracted_faces", detection_method="hog", model_path="siamese_face_recognition_simple_cnn.pth"):
        self.dataset_path = dataset_path
        self.detection_method = detection_method
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        
        # Transform cho ·∫£nh
        self.transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
        
        # T·∫°o danh s√°ch nh√£n v√† ·∫£nh tham chi·∫øu
        self.label_to_idx = {}
        self.reference_images = []
        folders = [f for f in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, f))]
        for idx, folder in enumerate(sorted(folders)):
            self.label_to_idx[folder] = idx
            # Ch·ªçn m·ªôt ·∫£nh tham chi·∫øu ng·∫´u nhi√™n cho m·ªói nh√£n
            image_files = glob.glob(os.path.join(dataset_path, folder, "*.jpg")) + \
                         glob.glob(os.path.join(dataset_path, folder, "*.png"))
            if image_files:
                self.reference_images.append((random.choice(image_files), idx))
        
        self.num_classes = len(self.label_to_idx)
        
        # Kh·ªüi t·∫°o m√¥ h√¨nh Siamese Network
        self.model = SiameseNetwork().to(self.device)
        
        # T·∫£i tr·∫°ng th√°i m√¥ h√¨nh
        try:
            self.model.load_state_dict(torch.load(model_path, map_location=self.device))
            self.model.eval()
            print(f"‚úÖ M√¥ h√¨nh Siamese Network ƒë√£ ƒë∆∞·ª£c t·∫£i t·ª´ {model_path}")
        except Exception as e:
            print(f"‚ùå L·ªói khi t·∫£i m√¥ h√¨nh: {e}")
            print("Vui l√≤ng hu·∫•n luy·ªán l·∫°i m√¥ h√¨nh Siamese Network v·ªõi dataset hi·ªán t·∫°i.")

    def draw_rectangles(self, frame, top, right, bottom, left, label="Face"):
        padding = 0
        top = max(0, top - padding)
        left = max(0, left - padding)
        right = min(frame.shape[1], right + padding)
        bottom = min(frame.shape[0], bottom + padding)
        cv2.rectangle(frame, (left, top), (right, bottom), (0, 255, 0), 2)
        cv2.putText(frame, label, (left, top - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        return frame

    def capture_images(self, num_images=10, person_name=None):
        if person_name:
            output_dir = os.path.join(self.dataset_path, person_name)
        else:
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            output_dir = os.path.join(self.dataset_path, f"person_{timestamp}")
        
        os.makedirs(output_dir, exist_ok=True)
        print(f"üìÅ L∆∞u ·∫£nh khu√¥n m·∫∑t v√†o th∆∞ m·ª•c: {output_dir}")

        video = cv2.VideoCapture(0)
        if not video.isOpened():
            print("‚ùå Kh√¥ng th·ªÉ m·ªü webcam")
            return

        print(f"üöÄ B·∫Øt ƒë·∫ßu ch·ª•p {num_images} ·∫£nh khu√¥n m·∫∑t...")
        count = 0
        while count < num_images:
            ret, frame = video.read()
            if not ret:
                print("‚ùå Kh√¥ng th·ªÉ l·∫•y khung h√¨nh t·ª´ webcam")
                break

            rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            face_locations = face_recognition.face_locations(rgb_frame, model=self.detection_method)

            if face_locations:
                top, right, bottom, left = face_locations[0]
                padding = 0
                top_padded = max(0, top - padding)
                left_padded = max(0, left - padding)
                right_padded = min(frame.shape[1], right + padding)
                bottom_padded = min(frame.shape[0], bottom + padding)

                face_img = frame[top_padded:bottom_padded, left_padded:right_padded]
                face_img_rgb = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
                face_img_pil = Image.fromarray(face_img_rgb)

                result, _ = predict(self.model, face_img_pil, self.reference_images, self.transform, self.label_to_idx, self.device)
                frame = self.draw_rectangles(frame, top, right, bottom, left, label=result)

                filename = f"face_{count:05d}.png"
                filepath = os.path.join(output_dir, filename)
                cv2.imwrite(filepath, face_img)
                print(f"üì∏ ƒê√£ l∆∞u khu√¥n m·∫∑t th·ª© {count + 1}/{num_images}: {filepath}")
                count += 1
            else:
                print(f"‚ö†Ô∏è Kh√¥ng ph√°t hi·ªán khu√¥n m·∫∑t trong khung h√¨nh th·ª© {count + 1}")
                cv2.imshow("Face Detection", frame)

            cv2.imshow("Face Detection", frame)
            if cv2.waitKey(1) & 0xFF == ord("q"):
                print("üõë Ng∆∞·ªùi d√πng ƒë√£ tho√°t")
                break
            time.sleep(1)

        video.release()
        cv2.destroyAllWindows()
        print(f"‚úÖ Ho√†n t·∫•t! ƒê√£ l∆∞u {count} ·∫£nh khu√¥n m·∫∑t v√†o {output_dir}")

    def recognize_faces(self):
        video = cv2.VideoCapture(0)
        if not video.isOpened():
            print("‚ùå Kh√¥ng th·ªÉ m·ªü webcam")
            return

        print("üöÄ B·∫Øt ƒë·∫ßu nh·∫≠n di·ªán khu√¥n m·∫∑t t·ª´ webcam...")
        while True:
            ret, frame = video.read()
            if not ret:
                print("‚ùå Kh√¥ng th·ªÉ l·∫•y khung h√¨nh t·ª´ webcam")
                break

            rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            face_locations = face_recognition.face_locations(rgb_frame, model=self.detection_method)

            for top, right, bottom, left in face_locations:
                padding = 0
                top_padded = max(0, top - padding)
                left_padded = max(0, left - padding)
                right_padded = min(frame.shape[1], right + padding)
                bottom_padded = min(frame.shape[0], bottom + padding)

                face_img = frame[top_padded:bottom_padded, left_padded:right_padded]
                face_img_rgb = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
                face_img_pil = Image.fromarray(face_img_rgb)

                result, _ = predict(self.model, face_img_pil, self.reference_images, self.transform, self.label_to_idx, self.device)
                frame = self.draw_rectangles(frame, top, right, bottom, left, label=result)

            cv2.imshow("Face Recognition", frame)
            if cv2.waitKey(1) & 0xFF == ord("q"):
                print("üõë Ng∆∞·ªùi d√πng ƒë√£ tho√°t")
                break

        video.release()
        cv2.destroyAllWindows()

if __name__ == "__main__":
    frs = FaceRecognitionSystem(dataset_path=r"C:\Users\Divu\Desktop\DADN\detect_face\extracted_faces", model_path=r"siamese_face_recognition_simple_cnn.pth")
    mode = input("Ch·ªçn ch·∫ø ƒë·ªô (1: Ch·ª•p ·∫£nh, 2: Nh·∫≠n di·ªán): ").strip()
    if mode == "1":
        person_name = input("Nh·∫≠p t√™n ng∆∞·ªùi (ho·∫∑c ƒë·ªÉ tr·ªëng ƒë·ªÉ d√πng timestamp): ").strip()
        frs.capture_images(num_images=10, person_name=person_name if person_name else None)
    elif mode == "2":
        frs.recognize_faces()

‚ùå L·ªói khi t·∫£i m√¥ h√¨nh: [Errno 2] No such file or directory: 'siamese_face_recognition_simple_cnn.pth'
Vui l√≤ng hu·∫•n luy·ªán l·∫°i m√¥ h√¨nh Siamese Network v·ªõi dataset hi·ªán t·∫°i.
üöÄ B·∫Øt ƒë·∫ßu nh·∫≠n di·ªán khu√¥n m·∫∑t t·ª´ webcam...
üõë Ng∆∞·ªùi d√πng ƒë√£ tho√°t
