In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import transforms, datasets, models
from torch.utils.data import DataLoader, random_split, Subset
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

In [2]:
!mkdir emotion-recognition-model

In [3]:
# Huấn luyện model
def training_model(model, device, dataloader: list, criterion, optimizer, num_epoch, patience=0, save_path=None):
    print("Start Training...")
    best_acc = 0.0
    best_epoch = 0
    counter = 0
    for epoch in range(num_epoch):
      model.train()
      running_loss = 0.0
      for images, labels in dataloader[0]:
          images = images.to(device)
          labels = labels.to(device)
          optimizer.zero_grad()
          outputs = model(images)
          loss = criterion(outputs, labels)
          loss.backward()
          optimizer.step()
    
          running_loss += loss.item()
    
      model.eval()
      with torch.no_grad():
        all_preds = []
        all_labels = []
        test_loss = 0.0
        for images, labels in dataloader[1]:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            _, preds = torch.max(outputs, 1)
            test_loss += loss.item()
    
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
        acc = accuracy_score(all_labels, all_preds)
        prec = precision_score(all_labels, all_preds, average='weighted', zero_division=0)
        rec = recall_score(all_labels, all_preds, average='weighted', zero_division=0)
        f1 = f1_score(all_labels, all_preds, average='weighted', zero_division=0)
    
        print(f"Epoch: {epoch + 1}/{num_epoch}  Train Loss: {running_loss/len(dataloader[0]):.4f}")
        print(f"Test Loss: {test_loss/len(dataloader[1]):.4f}")
        print(f"Accuracy: {acc:.4f}")
        print(f"Precision: {prec:.4f}")
        print(f"Recall: {rec:.4f}")
        print(f"F1-score: {f1:.4f}\n")

        if patience != 0: 
            if acc > best_acc:
              best_acc = acc
              counter = 0
              best_epoch = epoch
              if save_path: torch.save(model.state_dict(), f"{save_path}/best_model_{best_epoch}e.pth")
            else:
              counter += 1
              print(f"No improvement in {epoch+1}/{EPOCH} epoch!")
        
            if counter >= patience:
              print(f"Stopping early after {epoch + 1} epoch")
              break
    if save_path: torch.save(model.state_dict(), f"{save_path}/model_{num_epoch}e.pth")
    print("Đã lưu mô hình thành công!")
    print("\nBáo cáo chi tiết:")
    print(classification_report(all_labels, all_preds))
    return model

In [4]:
# Tạo dataset để lấy dữ liệu
data_dir = "../input/human-face-emotions/"

transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

transform_test = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

data = datasets.ImageFolder(root=data_dir+"Data")
train_number = int(len(data) * 0.8)
test_number = len(data) - train_number
train_subset, test_subset = random_split(data, [train_number, test_number])
data_train = Subset(datasets.ImageFolder(root=data_dir+"Data", transform=transform_train), train_subset.indices)
data_test = Subset(datasets.ImageFolder(root=data_dir+"Data", transform=transform_test), test_subset.indices)

# data_train = datasets.ImageFolder(root=data_dir+"train", transform=transform_train)
# print("Classes:", data_train.classes)  # ['angry', 'happy', ...]
# print("Class to idx:", data_train.class_to_idx)
# data_test = datasets.ImageFolder(root=data_dir+"test", transform=transform_test)

dataloader_train = DataLoader(data_train, 16, True, num_workers=2)
dataloader_test = DataLoader(data_test, 16, False, num_workers=2)
dataloader = [dataloader_train, dataloader_test]

print("Dataloader: ",dataloader)
for images, labels in dataloader[0]:
    print(images.shape)
    print(labels)
    break

Dataloader:  [<torch.utils.data.dataloader.DataLoader object at 0x7baee1296990>, <torch.utils.data.dataloader.DataLoader object at 0x7baee33bae50>]
torch.Size([16, 3, 224, 224])
tensor([4, 2, 3, 1, 0, 3, 3, 1, 2, 1, 1, 2, 0, 2, 2, 1])


In [5]:
print("Classes:", data.classes)  # ['angry', 'happy', ...]
print("Class to idx:", data.class_to_idx)

Classes: ['Angry', 'Fear', 'Happy', 'Sad', 'Suprise']
Class to idx: {'Angry': 0, 'Fear': 1, 'Happy': 2, 'Sad': 3, 'Suprise': 4}


In [6]:
# Khởi tạo model EfficientNetB0
model = models.mobilenet_v2(pretrained=True)
num_classes = 7
for param in model.features.parameters():
    param.requires_grad = False
model.classifier[1] = nn.Linear(model.classifier[1].in_features, num_classes)

Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth
100%|██████████| 13.6M/13.6M [00:00<00:00, 106MB/s] 


In [7]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Device is used: ", device)
model.to(device)
criterion = nn.CrossEntropyLoss()
EPOCH = 100
patience = 7

Device is used:  cuda


In [8]:
print("--- TRAINING PHASE 1: Last Block ---")
optimizer = optim.Adam([
    {'params': model.classifier.parameters(), 'lr': 0.001}
])
model = training_model(model, device, dataloader, criterion, optimizer, num_epoch=10) #5 epoch
print("Training Phase 1 success!")

--- TRAINING PHASE 1: Last Block ---
Start Training...
Epoch: 1/10  Train Loss: 1.4173
Test Loss: 1.3411
Accuracy: 0.4638
Precision: 0.4808
Recall: 0.4638
F1-score: 0.4158

Epoch: 2/10  Train Loss: 1.3928
Test Loss: 1.3745
Accuracy: 0.4477
Precision: 0.4596
Recall: 0.4477
F1-score: 0.3771

Epoch: 3/10  Train Loss: 1.3986
Test Loss: 1.2771
Accuracy: 0.4779
Precision: 0.4579
Recall: 0.4779
F1-score: 0.4561

Epoch: 4/10  Train Loss: 1.3955
Test Loss: 1.3675
Accuracy: 0.4243
Precision: 0.4762
Recall: 0.4243
F1-score: 0.4082

Epoch: 5/10  Train Loss: 1.3997
Test Loss: 1.3590
Accuracy: 0.4610
Precision: 0.4759
Recall: 0.4610
F1-score: 0.4125

Epoch: 6/10  Train Loss: 1.3952
Test Loss: 1.3319
Accuracy: 0.4538
Precision: 0.4923
Recall: 0.4538
F1-score: 0.4412

Epoch: 7/10  Train Loss: 1.3959
Test Loss: 1.2759
Accuracy: 0.4838
Precision: 0.4694
Recall: 0.4838
F1-score: 0.4474

Epoch: 8/10  Train Loss: 1.3981
Test Loss: 1.3377
Accuracy: 0.4417
Precision: 0.4852
Recall: 0.4417
F1-score: 0.4455

E

In [9]:
for name, param in model.features.named_parameters():
    if "17" in name or "18" in name:  # các block cuối của MobileNetV2
        param.requires_grad = True
print("Unfreeze Block 17 and 18 success!")
print("--- TRAINING PHASE 2: Some Last Block ---")
optimizer_p2 = optim.Adam([
    {'params': model.features.parameters(), 'lr': 0.0001},
    {'params': model.classifier.parameters(), 'lr': 0.0005}
])
model = training_model(model, device, dataloader, criterion, optimizer_p2, num_epoch=20)
print("Training Phase 2 success!")

Unfreeze Block 17 and 18 success!
--- TRAINING PHASE 2: Some Last Block ---
Start Training...
Epoch: 1/20  Train Loss: 1.2325
Test Loss: 1.2109
Accuracy: 0.5061
Precision: 0.6242
Recall: 0.5061
F1-score: 0.5137

Epoch: 2/20  Train Loss: 1.1040
Test Loss: 1.0696
Accuracy: 0.5747
Precision: 0.6252
Recall: 0.5747
F1-score: 0.5610

Epoch: 3/20  Train Loss: 1.0544
Test Loss: 0.9696
Accuracy: 0.6156
Precision: 0.6165
Recall: 0.6156
F1-score: 0.6139

Epoch: 4/20  Train Loss: 1.0169
Test Loss: 0.9826
Accuracy: 0.6270
Precision: 0.6506
Recall: 0.6270
F1-score: 0.5976

Epoch: 5/20  Train Loss: 0.9905
Test Loss: 0.9262
Accuracy: 0.6286
Precision: 0.6306
Recall: 0.6286
F1-score: 0.6290

Epoch: 6/20  Train Loss: 0.9644
Test Loss: 0.9514
Accuracy: 0.6338
Precision: 0.6663
Recall: 0.6338
F1-score: 0.6211

Epoch: 7/20  Train Loss: 0.9421
Test Loss: 0.9261
Accuracy: 0.6458
Precision: 0.6526
Recall: 0.6458
F1-score: 0.6195

Epoch: 8/20  Train Loss: 0.9213
Test Loss: 0.9514
Accuracy: 0.6357
Precision: 0.

In [None]:
print("--- TRAINING PHASE 3: Entire Model ---")
for param in model.features.parameters():
    param.requires_grad = True
print("Unfreeze Model success!")
optimizer_p3 = optim.Adam([
    {'params': model.features.parameters(), 'lr': 0.00005},  # Tăng từ 0.00001
    {'params': model.classifier.parameters(), 'lr': 0.0002}
])
model = training_model(model, device, dataloader, criterion, optimizer_p3, num_epoch=15, save_path="emotion-recognition-model")
print("Training Phase 3 success!")

--- TRAINING PHASE 3: Entire Model ---
Unfreeze Model success!
Start Training...
Epoch: 1/15  Train Loss: 0.7079
Test Loss: 0.6789
Accuracy: 0.7416
Precision: 0.7463
Recall: 0.7416
F1-score: 0.7380

Epoch: 2/15  Train Loss: 0.6161
Test Loss: 0.6221
Accuracy: 0.7675
Precision: 0.7662
Recall: 0.7675
F1-score: 0.7653

Epoch: 3/15  Train Loss: 0.5478
Test Loss: 0.5610
Accuracy: 0.7904
Precision: 0.7880
Recall: 0.7904
F1-score: 0.7889

Epoch: 4/15  Train Loss: 0.4853
Test Loss: 0.5418
Accuracy: 0.8050
Precision: 0.8082
Recall: 0.8050
F1-score: 0.8063

Epoch: 5/15  Train Loss: 0.4278
Test Loss: 0.5077
Accuracy: 0.8182
Precision: 0.8208
Recall: 0.8182
F1-score: 0.8186

Epoch: 6/15  Train Loss: 0.3828
Test Loss: 0.4685
Accuracy: 0.8377
Precision: 0.8375
Recall: 0.8377
F1-score: 0.8375

Epoch: 7/15  Train Loss: 0.3398
Test Loss: 0.4397
Accuracy: 0.8547
Precision: 0.8541
Recall: 0.8547
F1-score: 0.8531

Epoch: 8/15  Train Loss: 0.2994
Test Loss: 0.4147
Accuracy: 0.8661
Precision: 0.8663
Recall: 