In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
import os
from PIL import Image
import glob
from sklearn.metrics import f1_score, precision_score, recall_score, classification_report

# ƒê·ªãnh nghƒ©a dataset
class FaceRecognitionDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.image_paths = []
        self.labels = []
        self.label_to_idx = {}
        
        folders = [f for f in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, f))]
        for idx, folder in enumerate(sorted(folders)):
            self.label_to_idx[folder] = idx
            image_files = glob.glob(os.path.join(root_dir, folder, "*.jpg")) + \
                         glob.glob(os.path.join(root_dir, folder, "*.png"))
            for img_path in image_files:
                self.image_paths.append(img_path)
                self.labels.append(idx)
        
        self.num_classes = len(self.label_to_idx)

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        label = self.labels[idx]
        img = Image.open(img_path).convert('RGB')
        if self.transform:
            img = self.transform(img)
        return img, label

# H√†m hu·∫•n luy·ªán m√¥ h√¨nh
def train_model(model, dataloader, criterion, optimizer, num_epochs=5, device='cuda'):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        accuracy = 100 * correct / total
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(dataloader):.4f}, Accuracy: {accuracy:.2f}%')
    
    # L∆∞u m√¥ h√¨nh
    torch.save(model.state_dict(), 'face_recognition_model.pth')
    print("‚úÖ M√¥ h√¨nh ResNet18 ƒë√£ ƒë∆∞·ª£c l∆∞u v√†o 'face_recognition_model.pth'")

# H√†m ƒë√°nh gi√° m√¥ h√¨nh
def evaluate_model(model, dataloader, device='cuda'):
    model.eval()
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    # T√≠nh F1 score, precision, recall
    f1 = f1_score(all_labels, all_preds, average='weighted')
    precision = precision_score(all_labels, all_preds, average='weighted')
    recall = recall_score(all_labels, all_preds, average='weighted')
    
    # In b√°o c√°o chi ti·∫øt
    print("Classification Report:")
    label_to_idx = dataloader.dataset.label_to_idx
    idx_to_label = {v: k for k, v in label_to_idx.items()}
    target_names = [idx_to_label[i] for i in range(len(label_to_idx))]
    print(classification_report(all_labels, all_preds, target_names=target_names))
    
    return f1, precision, recall

# Thi·∫øt l·∫≠p v√† hu·∫•n luy·ªán
if __name__ == "__main__":
    # Thi·∫øt l·∫≠p device
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    
    # T·∫°o dataset v√† dataloader cho t·∫≠p hu·∫•n luy·ªán
    transform = transforms.Compose([
        transforms.Resize((224, 224)),  # ResNet18 y√™u c·∫ßu k√≠ch th∆∞·ªõc 224x224
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Chu·∫©n h√≥a ImageNet
    ])
    train_dataset = FaceRecognitionDataset(root_dir=r"C:\Users\Divu\Desktop\DADN\detect_face\extracted_faces", transform=transform)
    train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)

    # Kh·ªüi t·∫°o m√¥ h√¨nh ResNet18
    model = models.resnet18(pretrained=False)  # Kh√¥ng s·ª≠ d·ª•ng pretrained
    model.fc = nn.Linear(model.fc.in_features, train_dataset.num_classes)  # ƒêi·ªÅu ch·ªânh l·ªõp cu·ªëi
    model = model.to(device)
    
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Hu·∫•n luy·ªán m√¥ h√¨nh
    train_model(model, train_dataloader, criterion, optimizer, num_epochs=5, device=device)

    # ƒê√°nh gi√° tr√™n t·∫≠p hu·∫•n luy·ªán
    print("\nƒê√°nh gi√° tr√™n t·∫≠p hu·∫•n luy·ªán:")
    f1, precision, recall = evaluate_model(model, train_dataloader, device)
    print(f"F1 Score: {f1:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}")

    # T·∫°o dataset v√† dataloader cho t·∫≠p test
    test_dataset = FaceRecognitionDataset(root_dir=r"C:\Users\Divu\Desktop\DADN\detect_face\extracted_faces", transform=transform)
    test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)

    # ƒê√°nh gi√° tr√™n t·∫≠p test
    print("\nƒê√°nh gi√° tr√™n t·∫≠p test:")
    f1, precision, recall = evaluate_model(model, test_dataloader, device)
    print(f"F1 Score: {f1:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}")

In [None]:
import os
import cv2
import time
from datetime import datetime
import face_recognition
import numpy as np
import torch
import torch.nn as nn
from PIL import Image
from torchvision import transforms, models

def predict(model, image, transform, label_to_idx, device='cuda', threshold=0.5):
    model.eval()
    start_time = time.time()
    with torch.no_grad():
        image = transform(image).unsqueeze(0).to(device)
        output = model(image)
        probabilities = torch.softmax(output, dim=1)
        max_prob, predicted = torch.max(probabilities, 1)
        if max_prob.item() < threshold:
            result = "Unknown"
        else:
            idx_to_label = {v: k for k, v in label_to_idx.items()}
            result = f"{idx_to_label[predicted.item()]} (Prob: {max_prob.item():.4f})"
    end_time = time.time()
    inference_time = end_time - start_time
    return result, inference_time

class FaceRecognitionSystem:
    def __init__(self, dataset_path=r"C:\Users\Divu\Desktop\DADN\detect_face\extracted_faces", detection_method="hog", model_path="face_recognition_model_res_net.pth"):
        self.dataset_path = dataset_path
        self.detection_method = detection_method
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        
        # Transform cho ResNet18
        self.transform = transforms.Compose([
            transforms.Resize((224, 224)),  # ResNet18 y√™u c·∫ßu k√≠ch th∆∞·ªõc 224x224
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Chu·∫©n h√≥a ImageNet
        ])
        
        # T·∫°o danh s√°ch nh√£n
        self.label_to_idx = {}
        folders = [f for f in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, f))]
        for idx, folder in enumerate(sorted(folders)):
            self.label_to_idx[folder] = idx
        self.num_classes = len(self.label_to_idx)
        
        # Kh·ªüi t·∫°o m√¥ h√¨nh ResNet18
        self.model = models.resnet18(pretrained=False)  # Kh√¥ng s·ª≠ d·ª•ng pretrained
        self.model.fc = nn.Linear(self.model.fc.in_features, self.num_classes)  # ƒêi·ªÅu ch·ªânh l·ªõp cu·ªëi
        self.model = self.model.to(self.device)
        
        # T·∫£i tr·∫°ng th√°i m√¥ h√¨nh
        try:
            self.model.load_state_dict(torch.load(model_path, map_location=self.device))
            self.model.eval()
            print(f"‚úÖ M√¥ h√¨nh ResNet18 ƒë√£ ƒë∆∞·ª£c t·∫£i t·ª´ {model_path}")
        except Exception as e:
            print(f"‚ùå L·ªói khi t·∫£i m√¥ h√¨nh: {e}")
            print("Vui l√≤ng hu·∫•n luy·ªán l·∫°i m√¥ h√¨nh ResNet18 v·ªõi dataset hi·ªán t·∫°i.")

    def draw_rectangles(self, frame, top, right, bottom, left, label="Face"):
        padding = 20
        top = max(0, top - padding)
        left = max(0, left - padding)
        right = min(frame.shape[1], right + padding)
        bottom = min(frame.shape[0], bottom + padding)
        cv2.rectangle(frame, (left, top), (right, bottom), (0, 255, 0), 2)
        cv2.putText(frame, label, (left, top - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        return frame

    def capture_images(self, num_images=10, person_name=None):
        if person_name:
            output_dir = os.path.join(self.dataset_path, person_name)
        else:
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            output_dir = os.path.join(self.dataset_path, f"person_{timestamp}")
        
        os.makedirs(output_dir, exist_ok=True)
        print(f"üìÅ L∆∞u ·∫£nh khu√¥n m·∫∑t v√†o th∆∞ m·ª•c: {output_dir}")

        video = cv2.VideoCapture(0)
        if not video.isOpened():
            print("‚ùå Kh√¥ng th·ªÉ m·ªü webcam")
            return

        print(f"üöÄ B·∫Øt ƒë·∫ßu ch·ª•p {num_images} ·∫£nh khu√¥n m·∫∑t...")
        count = 0
        while count < num_images:
            ret, frame = video.read()
            if not ret:
                print("‚ùå Kh√¥ng th·ªÉ l·∫•y khung h√¨nh t·ª´ webcam")
                break

            rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            face_locations = face_recognition.face_locations(rgb_frame, model=self.detection_method)

            if face_locations:
                top, right, bottom, left = face_locations[0]
                padding = 30
                top = max(0, top - padding)
                left = max(0, left - padding)
                right = min(frame.shape[1], right + padding)
                bottom = min(frame.shape[0], bottom + padding)

                face_img = frame[top:bottom, left:right]
                face_img_rgb = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
                face_img_pil = Image.fromarray(face_img_rgb)

                result, _ = predict(self.model, face_img_pil, self.transform, self.label_to_idx, self.device)
                frame = self.draw_rectangles(frame, top, right, bottom, left, label=result)

                filename = f"face_{count:05d}.png"
                filepath = os.path.join(output_dir, filename)
                cv2.imwrite(filepath, face_img)
                print(f"üì∏ ƒê√£ l∆∞u khu√¥n m·∫∑t th·ª© {count + 1}/{num_images}: {filepath}")
                count += 1
            else:
                print(f"‚ö†Ô∏è Kh√¥ng ph√°t hi·ªán khu√¥n m·∫∑t trong khung h√¨nh th·ª© {count + 1}")
                cv2.imshow("Face Detection", frame)

            cv2.imshow("Face Detection", frame)
            if cv2.waitKey(1) & 0xFF == ord("q"):
                print("üõë Ng∆∞·ªùi d√πng ƒë√£ tho√°t")
                break
            time.sleep(1)

        video.release()
        cv2.destroyAllWindows()
        print(f"‚úÖ Ho√†n t·∫•t! ƒê√£ l∆∞u {count} ·∫£nh khu√¥n m·∫∑t v√†o {output_dir}")

    def recognize_faces(self):
        video = cv2.VideoCapture(0)
        if not video.isOpened():
            print("‚ùå Kh√¥ng th·ªÉ m·ªü webcam")
            return

        print("üöÄ B·∫Øt ƒë·∫ßu nh·∫≠n di·ªán khu√¥n m·∫∑t t·ª´ webcam...")
        while True:
            ret, frame = video.read()
            if not ret:
                print("‚ùå Kh√¥ng th·ªÉ l·∫•y khung h√¨nh t·ª´ webcam")
                break

            rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            face_locations = face_recognition.face_locations(rgb_frame, model=self.detection_method)

            for top, right, bottom, left in face_locations:
                padding = 30
                top_padded = max(0, top - padding)
                left_padded = max(0, left - padding)
                right_padded = min(frame.shape[1], right + padding)
                bottom_padded = min(frame.shape[0], bottom + padding)

                face_img = frame[top_padded:bottom_padded, left_padded:right_padded]
                face_img_rgb = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
                face_img_pil = Image.fromarray(face_img_rgb)

                result, _ = predict(self.model, face_img_pil, self.transform, self.label_to_idx, self.device)
                frame = self.draw_rectangles(frame, top, right, bottom, left, label=result)

            cv2.imshow("Face Recognition", frame)
            if cv2.waitKey(1) & 0xFF == ord("q"):
                print("üõë Ng∆∞·ªùi d√πng ƒë√£ tho√°t")
                break

        video.release()
        cv2.destroyAllWindows()

if __name__ == "__main__":
    frs = FaceRecognitionSystem(dataset_path=r"C:\Users\Divu\Desktop\DADN\detect_face\extracted_faces", model_path=r"C:\Users\Divu\Desktop\DADN\detect_face\face_recognition_model_res_net.pth")
    mode = input("Ch·ªçn ch·∫ø ƒë·ªô (1: Ch·ª•p ·∫£nh, 2: Nh·∫≠n di·ªán): ").strip()
    if mode == "1":
        person_name = input("Nh·∫≠p t√™n ng∆∞·ªùi (ho·∫∑c ƒë·ªÉ tr·ªëng ƒë·ªÉ d√πng timestamp): ").strip()
        frs.capture_images(num_images=10, person_name=person_name if person_name else None)
    elif mode == "2":
        frs.recognize_faces()