---
```
Class: DPCV - SDH@HK242 - HCMUT
Name: Nguyễn Lê Nhật Minh
Exercise: 02

```
---

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.models as models
import numpy as np
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import accuracy_score, f1_score

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
# Tải dataset từ file .npy.
# https://drive.google.com/file/d/1ZgUIHN-tM2-W3zfKZnRA0jsq_EDyfxKm/view?usp=sharing
fer13_path = "./datasets/fer13_v2.npy"
data = np.load(fer13_path, allow_pickle=True).item()

x_train, y_train = data["train"]
x_val, y_val = data["val"]
x_test, y_test = data["test"]

# Kiểm tra kích thước dữ liệu
print(f"Train: {x_train.shape}, {y_train.shape}")
print(f"Val: {x_val.shape}, {y_val.shape}")
print(f"Test: {x_test.shape}, {y_test.shape}")

Train: (28709, 48, 48), (28709,)
Val: (3589, 48, 48), (3589,)
Test: (3589, 48, 48), (3589,)
[[ 70  80  82 ...  52  43  41]
 [ 65  61  58 ...  56  52  44]
 [ 50  43  54 ...  49  56  47]
 ...
 [ 91  65  42 ...  72  56  43]
 [ 77  82  79 ... 105  70  46]
 [ 77  72  84 ... 106 109  82]]


In [21]:
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.Grayscale(num_output_channels=3),  # Chuyển ảnh xám thành 3 kênh
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  # Chuẩn hóa 3 kênh
])

In [22]:
class FERDataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images.astype(np.uint8)  # Chuyển về kiểu uint8
        self.labels = labels.astype(np.int64)  # Chuyển về kiểu int64
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img = self.images[idx]
        label = self.labels[idx]

        if self.transform:
            img = self.transform(img)

        return img, torch.tensor(label, dtype=torch.long)  # Đảm bảo label là torch.long

In [10]:
# Load mô hình ResNet18 đã pretrained trên ImageNet
model = models.resnet18(pretrained=True)

# Thay thế fully connected layer cho bài toán classification với số lớp tương ứng
num_ftrs = model.fc.in_features
num_classes = len(np.unique(y_train))  # Xác định số lượng class từ dữ liệu
model.fc = nn.Linear(num_ftrs, num_classes)

# Chuyển mô hình sang GPU nếu có
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

In [11]:
for param in model.parameters():
    param.requires_grad = False

# Chỉ cho phép fine-tuning layer cuối cùng
for param in model.fc.parameters():
    param.requires_grad = True

In [12]:
for name, param in model.named_parameters():
    if 'layer4' in name or 'fc' in name:  # Unfreeze layer4 và FC
        param.requires_grad = True
    else:
        param.requires_grad = False

In [23]:
# Định nghĩa Loss function và Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Tạo DataLoader
train_dataset = FERDataset(x_train, y_train, transform=transform)
val_dataset = FERDataset(x_val, y_val, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct, total = 0, 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

    train_acc = 100 * correct / total

    # Đánh giá trên validation set
    model.eval()
    correct, total = 0, 0
    all_preds, all_labels = [], []
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    val_acc = 100 * correct / total
    f1 = f1_score(all_labels, all_preds, average="macro")
    metric = 0.5 * (val_acc / 100 + f1)

    print(
        f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss:.4f}, Train Acc: {train_acc:.2f}%, Val Acc: {val_acc:.2f}%, F1: {f1:.4f}, Metric: {metric:.4f}"
    )

Epoch 1/10, Loss: 1074.3124, Train Acc: 54.85%, Val Acc: 59.46%, F1: 0.5375, Metric: 0.5661
Epoch 2/10, Loss: 836.9963, Train Acc: 65.61%, Val Acc: 61.94%, F1: 0.6065, Metric: 0.6129
Epoch 3/10, Loss: 623.5642, Train Acc: 74.52%, Val Acc: 61.86%, F1: 0.5867, Metric: 0.6027
Epoch 4/10, Loss: 399.9234, Train Acc: 83.81%, Val Acc: 63.75%, F1: 0.6205, Metric: 0.6290
Epoch 5/10, Loss: 236.0592, Train Acc: 91.09%, Val Acc: 63.50%, F1: 0.6150, Metric: 0.6250
Epoch 6/10, Loss: 160.2859, Train Acc: 93.97%, Val Acc: 62.19%, F1: 0.6047, Metric: 0.6133
Epoch 7/10, Loss: 118.1186, Train Acc: 95.70%, Val Acc: 62.89%, F1: 0.6061, Metric: 0.6175
Epoch 8/10, Loss: 102.0102, Train Acc: 96.15%, Val Acc: 63.30%, F1: 0.6215, Metric: 0.6273
Epoch 9/10, Loss: 96.9618, Train Acc: 96.54%, Val Acc: 63.05%, F1: 0.6130, Metric: 0.6218
Epoch 10/10, Loss: 81.5107, Train Acc: 96.96%, Val Acc: 63.47%, F1: 0.6245, Metric: 0.6296


In [24]:
test_dataset = FERDataset(x_test, y_test, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

model.eval()
correct, total = 0, 0
all_preds, all_labels = [], []
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

test_acc = 100 * correct / total
test_f1 = f1_score(all_labels, all_preds, average='macro')
test_metric = 0.5 * (test_acc / 100 + test_f1)

print(f"Test Accuracy: {test_acc:.2f}%, F1 Score: {test_f1:.4f}, Final Metric: {test_metric:.4f}")

Test Accuracy: 64.08%, F1 Score: 0.6305, Final Metric: 0.6357
