In [1]:
import os
import random
from PIL import Image
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision.models import resnet18 as torchvision_resnet18, ResNet18_Weights

In [2]:
class TripletFaceDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.people = os.listdir(root_dir)
        self.image_paths = {p: os.listdir(os.path.join(root_dir, p)) for p in self.people}

    def __len__(self):
        return 10000

    def __getitem__(self, idx):
        person = random.choice(self.people)
        img_paths = random.sample(self.image_paths[person], 2)
        anchor_path = os.path.join(self.root_dir, person, img_paths[0])
        positive_path = os.path.join(self.root_dir, person, img_paths[1])

        negative_person = random.choice(self.people)
        while negative_person == person:
            negative_person = random.choice(self.people)
        negative_path = os.path.join(self.root_dir, negative_person, random.choice(self.image_paths[negative_person]))

        anchor = Image.open(anchor_path).convert("RGB")
        positive = Image.open(positive_path).convert("RGB")
        negative = Image.open(negative_path).convert("RGB")

        if self.transform:
            anchor = self.transform(anchor)
            positive = self.transform(positive)
            negative = self.transform(negative)

        return anchor, positive, negative



In [3]:
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.downsample = downsample

    def forward(self, x):
        identity = x
        out = self.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)
        return out



In [4]:
class CustomResNet18(nn.Module):
    def __init__(self, num_classes=128):
        super(CustomResNet18, self).__init__()
        self.inplanes = 64
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(BasicBlock, 64, 2)
        self.layer2 = self._make_layer(BasicBlock, 128, 2, stride=2)
        self.layer3 = self._make_layer(BasicBlock, 256, 2, stride=2)
        self.layer4 = self._make_layer(BasicBlock, 512, 2, stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512, num_classes)

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes * block.expansion),
            )
        layers = [block(self.inplanes, planes, stride, downsample)]
        self.inplanes = planes * block.expansion
        for _ in range(1, blocks):
            layers.append(block(self.inplanes, planes))
        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.relu(self.bn1(self.conv1(x)))
        x = self.maxpool(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return F.normalize(x, p=2, dim=1)



In [5]:
def load_pretrained_weights(custom_model, pretrained_model):
    pretrained_dict = pretrained_model.state_dict()
    custom_dict = custom_model.state_dict()
    matched_dict = {k: v for k, v in pretrained_dict.items()
                    if k in custom_dict and v.size() == custom_dict[k].size()}
    custom_dict.update(matched_dict)
    custom_model.load_state_dict(custom_dict)
    print(f" Loaded {len(matched_dict)} pretrained weights.")



In [6]:
class TripletNetwork(nn.Module):
    def __init__(self):
        super(TripletNetwork, self).__init__()
        self.embedding = CustomResNet18()
        pretrained_resnet = torchvision_resnet18(weights=ResNet18_Weights.IMAGENET1K_V1)
        load_pretrained_weights(self.embedding, pretrained_resnet)

    def forward(self, x):
        return self.embedding(x)



In [7]:
class TripletLoss(nn.Module):
    def __init__(self, margin=1.0):
        super(TripletLoss, self).__init__()
        self.margin = margin

    def forward(self, anchor, positive, negative):
        pos_dist = F.pairwise_distance(anchor, positive, p=2)
        neg_dist = F.pairwise_distance(anchor, negative, p=2)
        loss = F.relu(pos_dist - neg_dist + self.margin)
        return loss.mean()



In [8]:
def compute_accuracy(anchor, positive, negative):
    pos_dist = F.pairwise_distance(anchor, positive, p=2)
    neg_dist = F.pairwise_distance(anchor, negative, p=2)
    correct = (pos_dist < neg_dist).sum().item()
    total = anchor.size(0)
    return correct / total



In [9]:
def evaluate(model, dataloader, criterion):
    model.eval()
    total_loss = 0.0
    total_correct = 0
    total_samples = 0
    with torch.no_grad():
        for anchor, positive, negative in dataloader:
            anchor, positive, negative = anchor.cuda(), positive.cuda(), negative.cuda()
            anchor_emb = model(anchor)
            positive_emb = model(positive)
            negative_emb = model(negative)
            loss = criterion(anchor_emb, positive_emb, negative_emb)
            acc = compute_accuracy(anchor_emb, positive_emb, negative_emb)

            batch_size = anchor.size(0)
            total_loss += loss.item() * batch_size
            total_correct += acc * batch_size
            total_samples += batch_size

    avg_loss = total_loss / total_samples
    avg_acc = total_correct / total_samples
    return avg_loss, avg_acc



In [21]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from PIL import Image, ImageTk
import tkinter as tk
from tkinter import filedialog
from torchvision import transforms, models

# === Định nghĩa mô hình TripletNet ===
class TripletNet(nn.Module):
    def __init__(self, embedding_size=128):
        super(TripletNet, self).__init__()
        self.embedding = models.resnet18(pretrained=False)
        in_features = self.embedding.fc.in_features
        self.embedding.fc = nn.Linear(in_features, embedding_size)

    def forward_once(self, x):
        x = self.embedding(x)
        return F.normalize(x, p=2, dim=1)  # chuẩn hóa L2


    def forward(self, anchor, positive, negative):
        anchor_out = self.forward_once(anchor)
        positive_out = self.forward_once(positive)
        negative_out = self.forward_once(negative)
        return anchor_out, positive_out, negative_out

# === Hàm tính embedding cho thư viện ảnh ===
def build_gallery_embeddings(model, gallery_dir, transform):
    model.eval()
    gallery_embeddings = {}
    
    for person_name in os.listdir(gallery_dir):
        person_dir = os.path.join(gallery_dir, person_name)
        if not os.path.isdir(person_dir):
            continue
            
        for img_name in os.listdir(person_dir):
            img_path = os.path.join(person_dir, img_name)
            if not img_name.lower().endswith(('.jpg', '.jpeg', '.png')):
                continue
                
            try:
                img = Image.open(img_path).convert("RGB")
                img_tensor = transform(img).unsqueeze(0).cuda()
                with torch.no_grad():
                    emb = model.forward_once(img_tensor)
                # Key: person_name, Value: embedding

                key = f"{person_name}_{img_name}"
                gallery_embeddings[key] = (person_name, emb)
                
            except Exception as e:
                print(f"Lỗi xử lý ảnh {img_path}: {e}")
                
    return gallery_embeddings
# === Load model & gallery ===
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                         std=[0.229, 0.224, 0.225])
])


model = TripletNet().cuda()
model.load_state_dict(torch.load("./final_triplet_resnet18_ss2.pth"))
model.eval()

gallery_dir = "./DATA"
gallery_embeddings = build_gallery_embeddings(model, gallery_dir, transform)



  model.load_state_dict(torch.load("./final_triplet_resnet18_ss2.pth"))


In [22]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from PIL import Image, ImageTk
import tkinter as tk
from tkinter import filedialog
from torchvision import transforms, models
import cv2
import face_recognition
import numpy as np

# === Hàm tính embedding cho tất cả ảnh riêng lẻ ===
def build_individual_gallery_embeddings(model, gallery_dir, transform):
    model.eval()
    gallery_embeddings = {}
    
    for person_name in os.listdir(gallery_dir):
        person_dir = os.path.join(gallery_dir, person_name)
        if not os.path.isdir(person_dir):
            continue
            
        for img_name in os.listdir(person_dir):
            img_path = os.path.join(person_dir, img_name)
            if not img_name.lower().endswith(('.jpg', '.jpeg', '.png')):
                continue
                
            try:
                img = Image.open(img_path).convert("RGB")
                img_tensor = transform(img).unsqueeze(0).cuda()
                with torch.no_grad():
                    emb = model.forward_once(img_tensor)
                # Key: person_name, Value: embedding
                key = f"{person_name}_{img_name}"
                gallery_embeddings[key] = (person_name, emb)
                
            except Exception as e:
                print(f"Lỗi xử lý ảnh {img_path}: {e}")
                
    return gallery_embeddings

# === Hàm nhận diện với tất cả embedding riêng lẻ ===
def identify_image(image_path, model, gallery_embeddings, transform, threshold=0.4):
    img = Image.open(image_path).convert("RGB")
    img_tensor = transform(img).unsqueeze(0).cuda()
    
    with torch.no_grad():
        query_emb = model.forward_once(img_tensor)
    
    min_dist = float("inf")
    identity = "unknown"
    matched_image = ""
    
    for key, (person_name, emb) in gallery_embeddings.items():
        dist = F.pairwise_distance(query_emb, emb)
        if dist.item() < min_dist:
            min_dist = dist.item()
            identity = person_name
            matched_image = key
    
    if min_dist >= threshold:
        identity = "unknown"
        matched_image = ""
    
    return identity, min_dist, matched_image

# === Phương pháp kết hợp (Voting) ===
def identify_image_voting(image_path, model, gallery_embeddings, transform, threshold=0.4, top_k=5):
    """
    Lấy top-k ảnh gần nhất và vote cho người xuất hiện nhiều nhất
    """
    img = Image.open(image_path).convert("RGB")
    img_tensor = transform(img).unsqueeze(0).cuda()
    
    with torch.no_grad():
        query_emb = model.forward_once(img_tensor)
    
    # Tính khoảng cách với tất cả
    distances = []
    for key, (person_name, emb) in gallery_embeddings.items():
        dist = F.pairwise_distance(query_emb, emb).item()
        distances.append((dist, person_name, key))
    
    # Sắp xếp theo khoảng cách
    distances.sort(key=lambda x: x[0])
    
    # Lấy top-k và vote
    votes = {}
    for i in range(min(top_k, len(distances))):
        dist, person_name, key = distances[i]
        if dist < threshold:  # Chỉ vote cho những cái đủ gần
            votes[person_name] = votes.get(person_name, 0) + 1
    
    if not votes:
        return "unknown", distances[0][0] if distances else float("inf"), ""
    
    # Người có nhiều vote nhất
    identity = max(votes.keys(), key=lambda x: votes[x])
    min_dist = distances[0][0]  # Khoảng cách ngắn nhất
    
    return identity, min_dist, distances[0][2]

# === Hàm nhận diện khuôn mặt từ frame camera ===
def identify_frame(frame, model, gallery_embeddings, transform, threshold=0.7):
    # Chuyển frame OpenCV (BGR) sang RGB
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    # Tìm vị trí khuôn mặt
    face_locations = face_recognition.face_locations(rgb_frame)
    
    results = []
    for (top, right, bottom, left) in face_locations:
        # Cắt vùng khuôn mặt
        face_image = rgb_frame[top:bottom, left:right]
        face_pil = Image.fromarray(face_image).convert("RGB")
        face_tensor = transform(face_pil).unsqueeze(0).cuda()
        
        with torch.no_grad():
            query_emb = model.forward_once(face_tensor)
        
        min_dist = float("inf")
        identity = "unknown"
        
        for key, (person_name, emb) in gallery_embeddings.items():
            dist = F.pairwise_distance(query_emb, emb)
            if dist.item() < min_dist:
                min_dist = dist.item()
                identity = person_name
        
        if min_dist >= threshold:
            identity = "unknown"
        
        results.append((identity, min_dist, (top, right, bottom, left)))
    
    return rgb_frame, face_locations, results

# === Hàm hiển thị frame với khuôn mặt và nhãn ===
def display_frame(frame, face_locations, results):
    for (top, right, bottom, left), (identity, dist, _) in zip(face_locations, results):
        # Vẽ hình chữ nhật quanh khuôn mặt
        cv2.rectangle(frame, (left, top), (right, bottom), (0, 255, 0), 2)
        # Hiển thị nhãn trên hình chữ nhật
        label = f"{identity} ({dist:.3f})"
        cv2.putText(frame, label, (left, top - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
    
    # Chuyển frame sang định dạng PIL để hiển thị trên Tkinter
    frame_pil = Image.fromarray(frame)
    frame_pil = frame_pil.resize((500, 500), Image.Resampling.LANCZOS)
    frame_tk = ImageTk.PhotoImage(frame_pil)
    image_label.config(image=frame_tk)
    image_label.image = frame_tk
    # Cập nhật kết quả nhận diện
    if results:
        identity, dist, _ = results[0]  # Hiển thị kết quả của khuôn mặt đầu tiên
        result_text.set(f"Dự đoán: {identity}\nKhoảng cách: {dist:.3f}")
    else:
        result_text.set("Không phát hiện khuôn mặt")

# === Hàm bật camera và nhận diện thời gian thực ===
def start_camera():
    global cap, is_camera_running
    if is_camera_running:
        return
    
    is_camera_running = True
    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        result_text.set("Lỗi: Không thể mở camera")
        is_camera_running = False
        return
    
    def update_frame():
        if not is_camera_running:
            return
        ret, frame = cap.read()
        if ret:
            # Nhận diện khuôn mặt và vẽ hình chữ nhật
            rgb_frame, face_locations, results = identify_frame(frame, model, gallery_embeddings, transform)
            # Hiển thị frame
            display_frame(rgb_frame, face_locations, results)
            # Lặp lại để cập nhật frame
            root.after(50, update_frame)
        else:
            result_text.set("Lỗi: Không thể đọc frame từ camera")
            stop_camera()
    
    update_frame()

# === Hàm dừng camera ===
def stop_camera():
    global cap, is_camera_running
    if is_camera_running:
        is_camera_running = False
        if cap is not None:
            cap.release()
        image_label.config(image="")
        image_label.image = None
        result_text.set("Camera đã dừng")

# === Giao diện chính ===
def upload_and_identify():
    file_path = filedialog.askopenfilename(filetypes=[("Image files", "*.jpg *.jpeg *.png")])
    if not file_path:
        return

    img = Image.open(file_path).convert("RGB")
    img.thumbnail((300, 300))
    img_tk = ImageTk.PhotoImage(img)
    image_label.config(image=img_tk)
    image_label.image = img_tk

    identity, dist, matched_image = identify_image(file_path, model, gallery_embeddings, transform)
    filename = os.path.basename(file_path)
    image_name = os.path.splitext(filename)[0]

    if image_name.lower().startswith(identity.lower()):
        result_label.config(fg="green")
    else:
        result_label.config(fg="red")

    result_text.set(
        f"Tên ảnh: {image_name}\n"
        f"Dự đoán: {identity}\n"

    )

# def batch_test():
#     folder_path = filedialog.askdirectory(title="Chọn thư mục chứa ảnh để kiểm tra")
#     if not folder_path:
#         return

#     correct = 0
#     total = 0
#     log_lines = []

#     for filename in os.listdir(folder_path):
#         if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
#             file_path = os.path.join(folder_path, filename)
#             image_name = os.path.splitext(filename)[0]
#             identity, dist, matched_image = identify_image(file_path, model, gallery_embeddings, transform)

#             if image_name.lower().startswith(identity.lower()):
#                 correctness = "Đúng"
#                 correct += 1
#             else:
#                 correctness = "Sai"
#             total += 1
#             log_lines.append(f"{filename} --> Dự đoán: {identity}, Kết quả: {correctness}")

#     summary = f"\nTổng: {total} ảnh | Đúng: {correct} | Sai: {total - correct}"
#     log_lines.append(summary)

#     result_text.set(summary)
#     print("\n".join(log_lines))

# === Khởi tạo giao diện ===
root = tk.Tk()
root.title("Hệ thống Nhận diện Khuôn mặt")
root.geometry("1200x700")
root.configure(bg="#e6ecf0") 

# Tiêu đề
title = tk.Label(root, text="HỆ THỐNG NHẬN DIỆN KHUÔN MẶT", font=("Helvetica", 20, "bold"), bg="#e6ecf0", fg="#2c3e50")
title.pack(pady=20)

# Frame chứa các nút
button_frame = tk.Frame(root, bg="#e6ecf0")
button_frame.pack(pady=10)

# Các nút bấm
upload_btn = tk.Button(
    button_frame, 
    text="Chọn ảnh để nhận diện", 
    font=("Helvetica", 14), 
    bg="#3498db",  # Màu xanh dương nhẹ
    fg="white", 
    width=20, 
    padx=10, 
    pady=8, 
    relief="flat",
    activebackground="#2980b9",
    command=upload_and_identify
)
upload_btn.pack(side="left", padx=10)

camera_btn = tk.Button(
    button_frame, 
    text="Bật Camera", 
    font=("Helvetica", 14), 
    bg="#1abc9c",  # Màu xanh ngọc
    fg="white", 
    width=20, 
    padx=10, 
    pady=8, 
    relief="flat",
    activebackground="#16a085",
    command=start_camera
)
camera_btn.pack(side="left", padx=10)

stop_camera_btn = tk.Button(
    button_frame, 
    text="Dừng Camera", 
    font=("Helvetica", 14), 
    bg="#e74c3c",  # Màu đỏ nhạt
    fg="white", 
    width=20, 
    padx=10, 
    pady=8, 
    relief="flat",
    activebackground="#c0392b",
    command=stop_camera
)
stop_camera_btn.pack(side="left", padx=10)

# Frame hiển thị ảnh
frame_image = tk.Frame(root, width=500, height=500, bg="white", bd=3, relief="ridge")
frame_image.pack(pady=20)
image_label = tk.Label(frame_image, bg="white")
image_label.pack()

# Kết quả nhận diện
result_text = tk.StringVar()
result_label = tk.Label(root, textvariable=result_text, font=("Helvetica", 16), bg="#e6ecf0", fg="#2c3e50", justify="center")
result_label.pack(pady=20)

# Nút kiểm tra hàng loạt
# batch_btn = tk.Button(
#     root, 
#     text="Kiểm tra cả thư mục ảnh", 
#     font=("Helvetica", 14), 
#     bg="#2ecc71",  # Màu xanh lá
#     fg="white", 
#     width=25, 
#     padx=10, 
#     pady=8, 
#     relief="flat",
#     activebackground="#27ae60",
#     command=batch_test
# )
# batch_btn.pack(pady=10)

cap = None
is_camera_running = False

root.mainloop()