In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
import os
from PIL import Image
import glob
from sklearn.metrics import f1_score, precision_score, recall_score, classification_report
import time
# Định nghĩa dataset
class FaceRecognitionDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.image_paths = []
        self.labels = []
        self.label_to_idx = {}
        
        folders = [f for f in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, f)) ]
        for idx, folder in enumerate(sorted(folders)):
            self.label_to_idx[folder] = idx
            image_files = glob.glob(os.path.join(root_dir, folder, "*.jpg")) + \
                         glob.glob(os.path.join(root_dir, folder, "*.png"))
            for img_path in image_files:
                self.image_paths.append(img_path)
                self.labels.append(idx)
        
        self.num_classes = len(self.label_to_idx)

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        label = self.labels[idx]
        img = Image.open(img_path).convert('RGB')
        if self.transform:
            img = self.transform(img)
        return img, label

# Hàm huấn luyện mô hình
def train_model(model, dataloader, criterion, optimizer, num_epochs=5, device='cuda'):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        accuracy = 100 * correct / total
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(dataloader):.4f}, Accuracy: {accuracy:.2f}%')
    
    # Lưu mô hình
    torch.save(model.state_dict(), 'face_recognition_model_res_net.pth')
    print("✅ Mô hình ResNet18 đã được lưu vào 'face_recognition_model_res_net.pth'")

# Hàm đánh giá mô hình
def evaluate_model(model, dataloader, device='cuda'):
    model.eval()
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    # Tính F1 score, precision, recall
    f1 = f1_score(all_labels, all_preds, average='weighted')
    precision = precision_score(all_labels, all_preds, average='weighted')
    recall = recall_score(all_labels, all_preds, average='weighted')
    
    # In báo cáo chi tiết
    print("Classification Report:")
    label_to_idx = dataloader.dataset.label_to_idx
    idx_to_label = {v: k for k, v in label_to_idx.items()}
    target_names = [idx_to_label[i] for i in range(len(label_to_idx))]
    print(classification_report(all_labels, all_preds, target_names=target_names))
    
    return f1, precision, recall


# Hàm dự đoán và kiểm tra thời gian
def predict(model, image, transform, label_to_idx, device='cuda', threshold=0.7):
    model.eval()
    start_time = time.time()
    with torch.no_grad():
        image = transform(image).unsqueeze(0).to(device)
        output = model(image)
        probabilities = torch.softmax(output, dim=1)
        max_prob, predicted = torch.max(probabilities, 1)
        # Kiểm tra ngưỡng để xác định unknown
        if max_prob.item() < threshold:
            result = "Unknown"
        else:
            # Lấy tên nhãn từ chỉ số
            idx_to_label = {v: k for k, v in label_to_idx.items()}
            result = f"{idx_to_label[predicted.item()]} (Prob: {max_prob.item():.4f})"
    end_time = time.time()
    inference_time = end_time - start_time
    return result, inference_time

# Hàm dự đoán cho toàn bộ ảnh trong một thư mục
def predict_folder(model, folder_path, transform, label_to_idx, device='cuda', threshold=0.8):
    print(f"\nPredicting images in folder: {folder_path}")
    
    # Lấy danh sách tất cả ảnh trong thư mục (hỗ trợ .jpg và .png)
    image_files = glob.glob(os.path.join(folder_path, "*.jpg")) + \
                  glob.glob(os.path.join(folder_path, "*.png"))
    
    if not image_files:
        print("No images found in the folder. Please check the directory.")
        return
    
    # Lưu kết quả dự đoán
    results = []
    total_inference_time = 0.0
    prediction_counts = {}

    for img_path in image_files:
        try:
            img = Image.open(img_path).convert('RGB')
            result, inference_time = predict(model, img, transform, label_to_idx, device, threshold)
            
            # In kết quả cho từng ảnh
            print(f"Image: {os.path.basename(img_path)}")
            print(f"Prediction: {result}")
            print(f"Inference time: {inference_time:.6f} seconds")
            print("-" * 50)
            
            results.append((img_path, result, inference_time))
            total_inference_time += inference_time
            
            # Đếm số lượng dự đoán cho mỗi nhãn
            label = result.split(" (")[0]  # Lấy nhãn (bỏ phần xác suất)
            prediction_counts[label] = prediction_counts.get(label, 0) + 1
            
        except Exception as e:
            print(f"Error processing {img_path}: {e}")
    
    # In tóm tắt
    print("\nSummary:")
    print(f"Total images processed: {len(results)}")
    print(f"Average inference time: {total_inference_time / len(results):.6f} seconds")
    print("\nPrediction counts:")
    for label, count in prediction_counts.items():
        print(f"{label}: {count} images")

# Thiết lập và huấn luyện
if __name__ == "__main__":
    # Thiết lập device
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    
    # Tạo dataset và dataloader cho tập huấn luyện
    transform = transforms.Compose([
        transforms.Resize((224, 224)),  # ResNet18 yêu cầu kích thước 224x224
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Chuẩn hóa ImageNet
    ])
    train_dataset = FaceRecognitionDataset(root_dir=r"C:\Users\Divu\Desktop\DADN\detect_face\extracted_faces", transform=transform)
    train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)

    # Khởi tạo mô hình ResNet18
    model = models.resnet18(pretrained=False)  # Không sử dụng pretrained
    model.fc = nn.Linear(model.fc.in_features, train_dataset.num_classes)  # Điều chỉnh lớp cuối
    model = model.to(device)
    
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Huấn luyện mô hình
    train_model(model, train_dataloader, criterion, optimizer, num_epochs=10, device=device)

    # Đánh giá trên tập huấn luyện
    print("\nĐánh giá trên tập huấn luyện:")
    f1, precision, recall = evaluate_model(model, train_dataloader, device)
    print(f"F1 Score: {f1:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}")

    # Tạo dataset và dataloader cho tập test
    test_dataset = FaceRecognitionDataset(root_dir=r"C:\Users\Divu\Desktop\DADN\detect_face\extracted_faces", transform=transform)
    test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)

    # Đánh giá trên tập test
    print("\nĐánh giá trên tập test:")
    f1, precision, recall = evaluate_model(model, test_dataloader, device)
    print(f"F1 Score: {f1:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}")


Epoch 1/10, Loss: 0.1688, Accuracy: 94.33%
Epoch 2/10, Loss: 0.0645, Accuracy: 97.99%
Epoch 3/10, Loss: 0.0499, Accuracy: 98.35%
Epoch 4/10, Loss: 0.0388, Accuracy: 98.61%
Epoch 5/10, Loss: 0.0303, Accuracy: 98.92%
Epoch 6/10, Loss: 0.0131, Accuracy: 99.48%
Epoch 7/10, Loss: 0.0134, Accuracy: 99.69%
Epoch 8/10, Loss: 0.0068, Accuracy: 99.79%
Epoch 9/10, Loss: 0.0022, Accuracy: 99.95%
Epoch 10/10, Loss: 0.0008, Accuracy: 100.00%
✅ Mô hình ResNet18 đã được lưu vào 'face_recognition_model_res_net.pth'

Đánh giá trên tập huấn luyện:
Classification Report:
                      precision    recall  f1-score   support

                Luan       1.00      1.00      1.00        18
temp_20250413_215507       1.00      1.00      1.00         9
temp_20250417_160515       1.00      1.00      1.00        10
               thinh       1.00      1.00      1.00        19
             unknown       1.00      1.00      1.00      1885

            accuracy                           1.00      1941
      

In [2]:

predict_folder_path = r"C:\Users\Divu\Desktop\DADN\detect_face\extracted_faces\unknown"
predict_folder(model, predict_folder_path, transform, train_dataset.label_to_idx, device=device, threshold=0.9)



Predicting images in folder: C:\Users\Divu\Desktop\DADN\detect_face\extracted_faces\unknown
Image: face_00001_0.png
Prediction: unknown (Prob: 1.0000)
Inference time: 0.016397 seconds
--------------------------------------------------
Image: face_00002_0.png
Prediction: unknown (Prob: 1.0000)
Inference time: 0.002281 seconds
--------------------------------------------------
Image: face_00005_0.png
Prediction: unknown (Prob: 1.0000)
Inference time: 0.003854 seconds
--------------------------------------------------
Image: face_00007_0.png
Prediction: unknown (Prob: 1.0000)
Inference time: 0.000000 seconds
--------------------------------------------------
Image: face_00008_0.png
Prediction: unknown (Prob: 1.0000)
Inference time: 0.000398 seconds
--------------------------------------------------
Image: face_00010_0.png
Prediction: unknown (Prob: 0.9999)
Inference time: 0.006726 seconds
--------------------------------------------------
Image: face_00011_0.png
Prediction: unknown (Prob

In [3]:
predict_folder_path = r"C:\Users\Divu\Desktop\DADN\detect_face\extracted_faces\temp_20250413_215507"
predict_folder(model, predict_folder_path, transform, train_dataset.label_to_idx, device=device, threshold=0.9)



Predicting images in folder: C:\Users\Divu\Desktop\DADN\detect_face\extracted_faces\temp_20250413_215507
Image: face_00000_0.png
Prediction: temp_20250413_215507 (Prob: 0.9790)
Inference time: 0.005808 seconds
--------------------------------------------------
Image: face_00001_0.png
Prediction: temp_20250413_215507 (Prob: 0.9380)
Inference time: 0.005189 seconds
--------------------------------------------------
Image: face_00002_0.png
Prediction: temp_20250413_215507 (Prob: 0.9804)
Inference time: 0.007254 seconds
--------------------------------------------------
Image: face_00003_0.png
Prediction: temp_20250413_215507 (Prob: 0.9397)
Inference time: 0.005301 seconds
--------------------------------------------------
Image: face_00004_0.png
Prediction: Unknown
Inference time: 0.006061 seconds
--------------------------------------------------
Image: face_00005_0.png
Prediction: temp_20250413_215507 (Prob: 0.9082)
Inference time: 0.008174 seconds
-------------------------------------

In [4]:
predict_folder_path = r"C:\Users\Divu\Desktop\DADN\detect_face\extracted_faces\temp_20250417_160515"
predict_folder(model, predict_folder_path, transform, train_dataset.label_to_idx, device=device, threshold=0.9)



Predicting images in folder: C:\Users\Divu\Desktop\DADN\detect_face\extracted_faces\temp_20250417_160515
Image: face_00000_0.png
Prediction: temp_20250417_160515 (Prob: 0.9931)
Inference time: 0.003548 seconds
--------------------------------------------------
Image: face_00001_0.png
Prediction: temp_20250417_160515 (Prob: 0.9975)
Inference time: 0.004600 seconds
--------------------------------------------------
Image: face_00002_0.png
Prediction: temp_20250417_160515 (Prob: 0.9993)
Inference time: 0.003346 seconds
--------------------------------------------------
Image: face_00003_0.png
Prediction: temp_20250417_160515 (Prob: 0.9834)
Inference time: 0.003748 seconds
--------------------------------------------------
Image: face_00004_0.png
Prediction: temp_20250417_160515 (Prob: 0.9921)
Inference time: 0.002785 seconds
--------------------------------------------------
Image: face_00005_0.png
Prediction: temp_20250417_160515 (Prob: 0.9685)
Inference time: 0.005434 seconds
---------

In [5]:
predict_folder_path = r"C:\Users\Divu\Desktop\DADN\detect_face\extracted_faces\thinh"
predict_folder(model, predict_folder_path, transform, train_dataset.label_to_idx, device=device, threshold=0.9)



Predicting images in folder: C:\Users\Divu\Desktop\DADN\detect_face\extracted_faces\thinh
Image: face_00000.png
Prediction: thinh (Prob: 0.9359)
Inference time: 0.016117 seconds
--------------------------------------------------
Image: face_00001.png
Prediction: Unknown
Inference time: 0.011426 seconds
--------------------------------------------------
Image: face_00002.png
Prediction: thinh (Prob: 0.9481)
Inference time: 0.006453 seconds
--------------------------------------------------
Image: face_00003.png
Prediction: thinh (Prob: 0.9263)
Inference time: 0.009774 seconds
--------------------------------------------------
Image: face_00004.png
Prediction: Unknown
Inference time: 0.011977 seconds
--------------------------------------------------
Image: face_00005.png
Prediction: thinh (Prob: 0.9743)
Inference time: 0.018184 seconds
--------------------------------------------------
Image: face_00006.png
Prediction: thinh (Prob: 0.9993)
Inference time: 0.012105 seconds
-------------

In [None]:
import os
import cv2
import time
from datetime import datetime
import face_recognition
import numpy as np
import torch
import torch.nn as nn
from PIL import Image
from torchvision import transforms, models

def predict(model, image, transform, label_to_idx, device='cuda', threshold=0.5):
    model.eval()
    start_time = time.time()
    with torch.no_grad():
        image = transform(image).unsqueeze(0).to(device)
        output = model(image)
        probabilities = torch.softmax(output, dim=1)
        max_prob, predicted = torch.max(probabilities, 1)
        if max_prob.item() < threshold:
            result = "Unknown"
        else:
            idx_to_label = {v: k for k, v in label_to_idx.items()}
            result = f"{idx_to_label[predicted.item()]} (Prob: {max_prob.item():.4f})"
    end_time = time.time()
    inference_time = end_time - start_time
    return result, inference_time

class FaceRecognitionSystem:
    def __init__(self, dataset_path=r"C:\Users\Divu\Desktop\DADN\detect_face\extracted_faces", detection_method="hog", model_path="face_recognition_model_res_net.pth"):
        self.dataset_path = dataset_path
        self.detection_method = detection_method
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        
        # Transform cho ResNet18
        self.transform = transforms.Compose([
            transforms.Resize((224, 224)),  # ResNet18 yêu cầu kích thước 224x224
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Chuẩn hóa ImageNet
        ])
        
        # Tạo danh sách nhãn
        self.label_to_idx = {}
        folders = [f for f in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, f))]
        for idx, folder in enumerate(sorted(folders)):
            self.label_to_idx[folder] = idx
        self.num_classes = len(self.label_to_idx)
        
        # Khởi tạo mô hình ResNet18
        self.model = models.resnet18(pretrained=False)  # Không sử dụng pretrained
        self.model.fc = nn.Linear(self.model.fc.in_features, self.num_classes)  # Điều chỉnh lớp cuối
        self.model = self.model.to(self.device)
        
        # Tải trạng thái mô hình
        try:
            self.model.load_state_dict(torch.load(model_path, map_location=self.device))
            self.model.eval()
            print(f"✅ Mô hình ResNet18 đã được tải từ {model_path}")
        except Exception as e:
            print(f"❌ Lỗi khi tải mô hình: {e}")
            print("Vui lòng huấn luyện lại mô hình ResNet18 với dataset hiện tại.")

    def draw_rectangles(self, frame, top, right, bottom, left, label="Face"):
        padding = 20
        top = max(0, top - padding)
        left = max(0, left - padding)
        right = min(frame.shape[1], right + padding)
        bottom = min(frame.shape[0], bottom + padding)
        cv2.rectangle(frame, (left, top), (right, bottom), (0, 255, 0), 2)
        cv2.putText(frame, label, (left, top - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        return frame

    def capture_images(self, num_images=10, person_name=None):
        if person_name:
            output_dir = os.path.join(self.dataset_path, person_name)
        else:
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            output_dir = os.path.join(self.dataset_path, f"person_{timestamp}")
        
        os.makedirs(output_dir, exist_ok=True)
        print(f"📁 Lưu ảnh khuôn mặt vào thư mục: {output_dir}")

        video = cv2.VideoCapture(0)
        if not video.isOpened():
            print("❌ Không thể mở webcam")
            return

        print(f"🚀 Bắt đầu chụp {num_images} ảnh khuôn mặt...")
        count = 0
        while count < num_images:
            ret, frame = video.read()
            if not ret:
                print("❌ Không thể lấy khung hình từ webcam")
                break

            rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            face_locations = face_recognition.face_locations(rgb_frame, model=self.detection_method)

            if face_locations:
                top, right, bottom, left = face_locations[0]
                padding = 0
                top = max(0, top - padding)
                left = max(0, left - padding)
                right = min(frame.shape[1], right + padding)
                bottom = min(frame.shape[0], bottom + padding)

                face_img = frame[top:bottom, left:right]
                face_img_rgb = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
                face_img_pil = Image.fromarray(face_img_rgb)

                result, _ = predict(self.model, face_img_pil, self.transform, self.label_to_idx, self.device)
                frame = self.draw_rectangles(frame, top, right, bottom, left, label=result)

                filename = f"face_{count:05d}.png"
                filepath = os.path.join(output_dir, filename)
                cv2.imwrite(filepath, face_img)
                print(f"📸 Đã lưu khuôn mặt thứ {count + 1}/{num_images}: {filepath}")
                count += 1
            else:
                print(f"⚠️ Không phát hiện khuôn mặt trong khung hình thứ {count + 1}")
                cv2.imshow("Face Detection", frame)

            cv2.imshow("Face Detection", frame)
            if cv2.waitKey(1) & 0xFF == ord("q"):
                print("🛑 Người dùng đã thoát")
                break
            time.sleep(1)

        video.release()
        cv2.destroyAllWindows()
        print(f"✅ Hoàn tất! Đã lưu {count} ảnh khuôn mặt vào {output_dir}")

    def recognize_faces(self):
        video = cv2.VideoCapture(0)
        if not video.isOpened():
            print("❌ Không thể mở webcam")
            return

        print("🚀 Bắt đầu nhận diện khuôn mặt từ webcam...")
        while True:
            ret, frame = video.read()
            if not ret:
                print("❌ Không thể lấy khung hình từ webcam")
                break

            rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            face_locations = face_recognition.face_locations(rgb_frame, model=self.detection_method)

            for top, right, bottom, left in face_locations:
                padding = 0
                top_padded = max(0, top - padding)
                left_padded = max(0, left - padding)
                right_padded = min(frame.shape[1], right + padding)
                bottom_padded = min(frame.shape[0], bottom + padding)

                face_img = frame[top_padded:bottom_padded, left_padded:right_padded]
                face_img_rgb = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
                face_img_pil = Image.fromarray(face_img_rgb)

                result, _ = predict(self.model, face_img_pil, self.transform, self.label_to_idx, self.device)
                frame = self.draw_rectangles(frame, top, right, bottom, left, label=result)

            cv2.imshow("Face Recognition", frame)
            if cv2.waitKey(1) & 0xFF == ord("q"):
                print("🛑 Người dùng đã thoát")
                break

        video.release()
        cv2.destroyAllWindows()

if __name__ == "__main__":
    frs = FaceRecognitionSystem(dataset_path=r"C:\Users\Divu\Desktop\DADN\detect_face\extracted_faces", model_path=r"C:\Users\Divu\Desktop\DADN\detect_face\face_recognition_model_res_net.pth")
    mode = input("Chọn chế độ (1: Chụp ảnh, 2: Nhận diện): ").strip()
    if mode == "1":
        person_name = input("Nhập tên người (hoặc để trống để dùng timestamp): ").strip()
        frs.capture_images(num_images=10, person_name=person_name if person_name else None)
    elif mode == "2":
        frs.recognize_faces()



✅ Mô hình ResNet18 đã được tải từ C:\Users\Divu\Desktop\DADN\detect_face\face_recognition_model_res_net.pth
🚀 Bắt đầu nhận diện khuôn mặt từ webcam...
🛑 Người dùng đã thoát
