In [1]:
import os

import torch
from torchinfo import summary
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts

from going_modular.dataloader.magface import create_magface_dataloader
from going_modular.model.MagFaceRecognition import MagFaceRecognition
from going_modular.train_eval.train import fit
from going_modular.loss.MagLoss import MagLoss
from going_modular.utils.MultiMetricEarlyStopping import MultiMetricEarlyStopping
from going_modular.utils.ModelCheckPoint import ModelCheckpoint

device = "cuda" if torch.cuda.is_available() else "cpu"
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

In [2]:
CONFIGURATION = {
    # Thư mục
    'type': 'albedo',
    'train_dir': './Dataset/Albedo/train',
    'test_dir': './Dataset/Albedo/test',
    
    # Cấu hình train
    'backbone': 'iresnet18',
    'epochs': 2000,
    'num_workers': 4,
    'batch_size': 16,
    'image_size': 224,
    'num_class': len(os.listdir('./Dataset/Albedo/train')),
    'embedding_size': 512,
    
    'learning_rate': 0.2,
    'weight_decay': 5e-4,
    'momentum': 0.9,
    'alpha': 0.9,
    
    # Hàm m(ai) giúp thay đổi ai từ 0.25 đến 1.6
    'scale': 64,
    'lambda_g': 20,
    'l_margin': 0.45, 
    'u_margin': 0.8,
    'l_a': 10, 
    'u_a': 110,
}

In [3]:
# Đặt seed toàn cục
seed = 42
torch.manual_seed(seed)

<torch._C.Generator at 0x724d660d1130>

In [4]:
train_dataloader, val_dataloader = create_magface_dataloader(CONFIGURATION, None, None)

In [5]:
model = MagFaceRecognition(CONFIGURATION).to(device)
criterion = MagLoss(conf = CONFIGURATION)

optimizer = torch.optim.Adam(model.parameters(), lr=CONFIGURATION['learning_rate'])
scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=40, T_mult=1, eta_min=1e-6)
checkpoint_path = os.path.abspath('checkpoint/magface/albedo/' + CONFIGURATION['backbone'] + '/models/checkpoint.pth')
modle_checkpoint = ModelCheckpoint(filepath=checkpoint_path, verbose=1)
earlystop_dir = os.path.abspath('checkpoint/magface/albedo/' + CONFIGURATION['backbone'] + '/models')
early_stopping = MultiMetricEarlyStopping(
    monitor_keys=['val_euclidean_accuracy', 'val_cosine_accuracy', 'val_auc_euclidean', 'val_auc_cosine'],
    patience=40,
    mode='max',
    verbose=1,
    save_dir=earlystop_dir,
    start_from_epoch=40
)            

In [6]:
fit(CONFIGURATION, 0, model, device, train_dataloader, val_dataloader, criterion, optimizer, scheduler, early_stopping, modle_checkpoint)

Epoch 1:
	train: loss 3.721 | loss id   3.70 | top_1_acc 0.0067 | top_5_acc 0.0209 | acc_eu: 0.730 | acc_cos: 0.572 | auc_eu: 0.620 | auc_cos: 0.681
	val: acc_eu: 0.665 | acc_cos: 0.671 | auc_eu: 0.646 | auc_cos: 0.702
[36m	Saving model and optimizer state to /media/vohoang/WorkSpace/ubuntu/projects/in-process/Bachelor-s-Project/checkpoint/magface/albedo/iresnet18/models/checkpoint.pth[0m
Epoch 2:
	train: loss 2.081 | loss id   2.06 | top_1_acc 0.0105 | top_5_acc 0.0351 | acc_eu: 0.707 | acc_cos: 0.578 | auc_eu: 0.642 | auc_cos: 0.656
	val: acc_eu: 0.569 | acc_cos: 0.611 | auc_eu: 0.705 | auc_cos: 0.708
[36m	Saving model and optimizer state to /media/vohoang/WorkSpace/ubuntu/projects/in-process/Bachelor-s-Project/checkpoint/magface/albedo/iresnet18/models/checkpoint.pth[0m
Epoch 3:
	train: loss 0.677 | loss id   0.65 | top_1_acc 0.0097 | top_5_acc 0.0538 | acc_eu: 0.579 | acc_cos: 0.550 | auc_eu: 0.662 | auc_cos: 0.671
	val: acc_eu: 0.578 | acc_cos: 0.520 | auc_eu: 0.719 | auc_cos:

In [8]:
%load_ext tensorboard
%tensorboard --logdir checkpoint/magface/albedo/iresnet18/logs

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 171244), started 0:00:04 ago. (Use '!kill 171244' to kill it.)