In [1]:
import os

import torch
from torchinfo import summary
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts

from going_modular.dataloader.magface import create_magface_dataloader
from going_modular.model.MagFaceRecognition import MagFaceRecognition
from going_modular.train_eval.train import fit
from going_modular.loss.MagLoss import MagLoss
from going_modular.utils.MultiMetricEarlyStopping import MultiMetricEarlyStopping
from going_modular.utils.ModelCheckPoint import ModelCheckpoint

device = "cuda" if torch.cuda.is_available() else "cpu"
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

In [2]:
CONFIGURATION = {
    # Thư mục
    'type': 'albedo',
    'train_dir': './Dataset/Albedo/train',
    'test_dir': './Dataset/Albedo/test',
    
    # Cấu hình train
    'backbone': 'iresnet50',
    'epochs': 2000,
    'num_workers': 4,
    'batch_size': 16,
    'image_size': 224,
    'num_class': len(os.listdir('./Dataset/Albedo/train')),
    'embedding_size': 512,
    
    'learning_rate': 0.1,
    'weight_decay': 5e-4,
    'momentum': 0.9,
    'alpha': 0.9,
    
    # Hàm m(ai) giúp thay đổi ai từ 0.25 đến 1.6
    'scale': 64,
    'lambda_g': 20,
    'l_margin': 0.45, 
    'u_margin': 0.8,
    'l_a': 10, 
    'u_a': 110,
}

In [3]:
# Đặt seed toàn cục
seed = 42
torch.manual_seed(seed)

<torch._C.Generator at 0x7f2c70181130>

In [4]:
train_dataloader, val_dataloader = create_magface_dataloader(CONFIGURATION, None, None)

In [5]:
dataiter = iter(train_dataloader)
images, labels = next(dataiter)
print(f"Labels: {images.shape}")

Labels: torch.Size([16, 3, 224, 224])


In [6]:
# summary(
#     model=MagFaceRecognition(CONFIGURATION), 
#     verbose=0, 
#     col_width=20, 
#     row_settings=["var_names"],
#     col_names=["input_size", 'output_size', "num_params", "trainable"],
#     input_size=(1,3,224,224)
# )

In [9]:
model = MagFaceRecognition(CONFIGURATION).to(device)
criterion = MagLoss(conf = CONFIGURATION)

# optimizer = torch.optim.SGD(
#         filter(lambda p: p.requires_grad, model.parameters()),
#         CONFIGURATION['learning_rate'],
#         momentum=CONFIGURATION['momentum'],
#         weight_decay=CONFIGURATION['weight_decay']
# )
optimizer = torch.optim.Adam(model.parameters(), lr=CONFIGURATION['learning_rate'])
scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=40, T_mult=1, eta_min=1e-6)
checkpoint_path = os.path.abspath('checkpoint/magface/albedo/' + CONFIGURATION['backbone'] + '/models/checkpoint.pth')
modle_checkpoint = ModelCheckpoint(filepath=checkpoint_path, verbose=1)
earlystop_dir = os.path.abspath('checkpoint/magface/albedo/' + CONFIGURATION['backbone'] + '/models')
early_stopping = MultiMetricEarlyStopping(
    monitor_keys=['val_euclidean_accuracy', 'val_cosine_accuracy', 'val_auc_euclidean', 'val_auc_cosine'],
    patience=40,
    mode='max',
    verbose=1,
    save_dir=earlystop_dir,
    start_from_epoch=60
)            

In [10]:
fit(CONFIGURATION, 0, model, device, train_dataloader, val_dataloader, criterion, optimizer, scheduler, early_stopping, modle_checkpoint)

Epoch 1:
	train: loss 3.342 | loss id   3.32 | top_1_acc   0.00 | top_5_acc   0.03 | acc_eu: 0.491 | acc_cos: 0.208 | auc_eu: 0.578 | auc_cos: 0.531
	val: acc_eu: 0.824 | acc_cos: 0.177 | auc_eu: 0.597 | auc_cos: 0.504
[36m	Saving model to /media/vohoang/WorkSpace/ubuntu/projects/in-process/Bachelor-s-Project/checkpoint/magface/albedo/iresnet50/models/checkpoint.pth[0m
Epoch 2:
	train: loss 1.156 | loss id   1.13 | top_1_acc   0.01 | top_5_acc   0.02 | acc_eu: 0.504 | acc_cos: 0.489 | auc_eu: 0.572 | auc_cos: 0.555
	val: acc_eu: 0.687 | acc_cos: 0.500 | auc_eu: 0.585 | auc_cos: 0.556
[36m	Saving model to /media/vohoang/WorkSpace/ubuntu/projects/in-process/Bachelor-s-Project/checkpoint/magface/albedo/iresnet50/models/checkpoint.pth[0m
Epoch 3:
	train: loss 0.621 | loss id   0.60 | top_1_acc   0.00 | top_5_acc   0.04 | acc_eu: 0.533 | acc_cos: 0.335 | auc_eu: 0.583 | auc_cos: 0.606
	val: acc_eu: 0.593 | acc_cos: 0.457 | auc_eu: 0.606 | auc_cos: 0.637
[36m	Saving model to /media/voho

KeyboardInterrupt: 

In [12]:
%load_ext tensorboard
%tensorboard --logdir checkpoint/magface/albedo/iresnet50/logs

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6007 (pid 126216), started 0:00:04 ago. (Use '!kill 126216' to kill it.)

In [14]:
print('TB: 1.15211267606p/epoch')

TB: 1.15211267606p/epoch
