In [1]:
import warnings
warnings.filterwarnings("ignore")
from torch.optim import Adam
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts
import albumentations as A
import os
import torch

from going_modular.dataloader.multitask import create_concatv3_multitask_datafetcher
from going_modular.model.MTLFaceRecognition import MTLFaceRecognition
from going_modular.model.ConcatMTLFaceRecognition import ConcatMTLFaceRecognitionV3
from going_modular.loss.ConcatMultiTaskLoss import ConcatMultiTaskLoss
from going_modular.train_eval.concat_train import fit
from going_modular.utils.transforms import RandomResizedCropRect, GaussianNoise
from going_modular.utils.MultiMetricEarlyStopping import MultiMetricEarlyStopping
from going_modular.utils.ModelCheckPoint import ModelCheckpoint

device = "cuda" if torch.cuda.is_available() else "cpu"
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

# Đặt seed toàn cục
seed = 42
torch.manual_seed(seed)

CONFIGURATION = {
    'type': 'concat3',
    
    # Thư mục
    'dataset_dir': './Dataset',
    'checkpoint_dir': './checkpoint/multi/',
    'checkpoint_1': './checkpoint/multi/normalmap/models/checkpoint.pth',
    'checkpoint_2': './checkpoint/multi/albedo/models/checkpoint.pth',
    'checkpoint_3': './checkpoint/multi/depthmap/models/checkpoint.pth',
        
    # Cấu hình train
    'device': device,
    'epochs': 119,
    'num_workers': 4,
    'batch_size': 16,
    'image_size': 256,
    'base_lr': 1e-4,
    
    # Cấu hình network
    'backbone': 'miresnet18',
    'embedding_size': 512,
    'num_classes': None,
    'loss_gender_weight': 30,
    'loss_emotion_weight': 5,
    'loss_pose_weight': 30,
    'loss_spectacles_weight': 5,
    'loss_facial_hair_weight': 5,
}

CONFIGURATION['num_classes'] = len(os.listdir('./Dataset/Albedo/train'))

train_transform = A.Compose([
    RandomResizedCropRect(256),
    GaussianNoise(),
], additional_targets={
    'albedo': 'image',
    'depthmap': 'image'
})


test_transform = A.Compose([
    A.Resize(height=CONFIGURATION['image_size'], width=CONFIGURATION['image_size'])
], additional_targets={
    'albedo': 'image',
    'depthmap': 'image'
})

train_dataloader, test_dataloader = create_concatv3_multitask_datafetcher(CONFIGURATION, train_transform, test_transform)

2025-01-15 23:06:16.266762: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-01-15 23:06:16.275914: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1736957176.287150   91094 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1736957176.290292   91094 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-15 23:06:16.302912: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

In [3]:
checkpoint_1 = torch.load(CONFIGURATION['checkpoint_1'], map_location=torch.device('cpu'))
mtl_normalmap = MTLFaceRecognition(CONFIGURATION['backbone'], CONFIGURATION['num_classes'])
mtl_normalmap.load_state_dict(checkpoint_1['model_state_dict'])

checkpoint_2 = torch.load(CONFIGURATION['checkpoint_2'], map_location=torch.device('cpu'))
mtl_albedo = MTLFaceRecognition(CONFIGURATION['backbone'], CONFIGURATION['num_classes'])
mtl_albedo.load_state_dict(checkpoint_2['model_state_dict'])

checkpoint_3 = torch.load(CONFIGURATION['checkpoint_3'], map_location=torch.device('cpu'))
mtl_depthmap = MTLFaceRecognition(CONFIGURATION['backbone'], CONFIGURATION['num_classes'])
mtl_depthmap.load_state_dict(checkpoint_3['model_state_dict'])

model = ConcatMTLFaceRecognitionV3(mtl_normalmap, mtl_albedo, mtl_depthmap, CONFIGURATION['num_classes'])

for param in model.mtl_normalmap.parameters():
    param.requires_grad = False
    
for param in model.mtl_albedo.parameters():
    param.requires_grad = False

for param in model.mtl_depthmap.parameters():
    param.requires_grad = False

In [4]:
criterion = ConcatMultiTaskLoss(os.path.join(CONFIGURATION['dataset_dir'], 'train_set.csv'), CONFIGURATION)

In [5]:
optimizer = Adam(model.parameters(), lr=CONFIGURATION['base_lr'])
# Khởi tạo scheduler
scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=40, T_mult=1, eta_min=1e-6)

earlystop_dir = os.path.abspath(CONFIGURATION['checkpoint_dir'] + CONFIGURATION['type'] + '/models')

early_stopping = MultiMetricEarlyStopping(
    monitor_keys=['cosine_auc', 'euclidean_auc'],
    patience=1000,
    mode='max',
    verbose=0,
    save_dir=earlystop_dir,
    start_from_epoch=0
)      
checkpoint_path = os.path.abspath(CONFIGURATION['checkpoint_dir'] + CONFIGURATION['type'] + '/models/checkpoint.pth')
modle_checkpoint = ModelCheckpoint(filepath=checkpoint_path, verbose=1)

In [6]:
fit(
    conf=CONFIGURATION,
    start_epoch=0,
    model=model,
    train_dataloader=train_dataloader, 
    test_dataloader=test_dataloader, 
    criterion=criterion,
    optimizer=optimizer,    
    scheduler=scheduler, 
    early_stopping=early_stopping,
    model_checkpoint=modle_checkpoint
)

KeyboardInterrupt: 

In [4]:
checkpoint = torch.load(checkpoint_path)
model.load_state_dict(checkpoint['model_state_dict'])
model.to(device)
model.eval()

MTLFaceRecognition(
  (backbone): MIResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (prelu): PReLU(num_parameters=1)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (prelu): PReLU(num_parameters=1)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (downsample): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, mom

In [5]:
import cv2, os

os.environ["OPENCV_IO_ENABLE_OPENEXR"]="1"

emotion: sai task 0 toàn đoán thành task 2
occlusion + pose: tăng khả năng phân biệt giữa các lớp
spectales: tốt nhưng cần hơn
facial_hair + gender: quá tốt không cần chỉnh

Gender (1), Spectacles (0), Facial_Hair (1), Pose(0), Emotion(0)

In [None]:
image_path = './Dataset/Albedo/gallery/1003/2008-02-21_16-38-47.exr'

image = cv2.cvtColor(cv2.imread(image_path, cv2.IMREAD_UNCHANGED), cv2.COLOR_GRAY2RGB)

transfromed = test_transform(image=image)

X = torch.from_numpy(transfromed['image']).permute(2,0,1).unsqueeze(0).to(device)

x_id, x_gender, x_pose, x_emotion, x_facial_hair, x_spectacles = model.get_result(X)
x_gender = torch.softmax(x_gender, dim=1)
print(x_gender)
x_spectacles = torch.softmax(x_spectacles, dim=1)
print(x_spectacles)
x_facial_hair = torch.softmax(x_facial_hair, dim=1)
print(x_facial_hair)
x_pose = torch.softmax(x_pose, dim=1)
print(x_pose)
x_emotion = torch.softmax(x_emotion, dim=1)
print(x_emotion)

tensor([[0.3162, 0.6838]], device='cuda:0', grad_fn=<SoftmaxBackward0>)
tensor([[0.0028, 0.9972]], device='cuda:0', grad_fn=<SoftmaxBackward0>)
tensor([[0.0011, 0.9989]], device='cuda:0', grad_fn=<SoftmaxBackward0>)
tensor([[0.8657, 0.1343]], device='cuda:0', grad_fn=<SoftmaxBackward0>)
tensor([[0.8622, 0.1378]], device='cuda:0', grad_fn=<SoftmaxBackward0>)


Gender (1), Spectacles (0), Facial_Hair (1), Pose(2), Occlusion (2),Emotion(0)

In [9]:
image_path = './Dataset/Albedo/gallery/2186/2007-12-04_12-15-04.exr'

image = cv2.cvtColor(cv2.imread(image_path, cv2.IMREAD_UNCHANGED), cv2.COLOR_GRAY2RGB)

transfromed = test_transform(image=image)

X = torch.from_numpy(transfromed['image']).permute(2,0,1).unsqueeze(0).to(device)

x_id, x_gender, x_pose, x_emotion, x_facial_hair, x_occlusion, x_spectacles = model.get_result(X)
x_gender = torch.softmax(x_gender, dim=1)
print(x_gender)
x_spectacles = torch.softmax(x_spectacles, dim=1)
print(x_spectacles)
x_facial_hair = torch.softmax(x_facial_hair, dim=1)
print(x_facial_hair)
x_pose = torch.softmax(x_pose, dim=1)
print(x_pose)
x_occlusion = torch.softmax(x_occlusion, dim=1)
print(x_occlusion)
x_emotion = torch.softmax(x_emotion, dim=1)
print(x_emotion)

tensor([[0.2285, 0.7715]], device='cuda:0', grad_fn=<SoftmaxBackward0>)
tensor([[0.8189, 0.1811]], device='cuda:0', grad_fn=<SoftmaxBackward0>)
tensor([[0.0703, 0.9297]], device='cuda:0', grad_fn=<SoftmaxBackward0>)
tensor([[0.1183, 0.1410, 0.7407]], device='cuda:0', grad_fn=<SoftmaxBackward0>)
tensor([[0.0961, 0.5510, 0.3529]], device='cuda:0', grad_fn=<SoftmaxBackward0>)
tensor([[0.5269, 0.0341, 0.4390]], device='cuda:0', grad_fn=<SoftmaxBackward0>)


Gender (1), Spectacles (1), Facial_Hair (0), Pose(0), Occlusion (2),Emotion(0)

In [10]:
image_path = './Dataset/Albedo/gallery/1009/2008-02-18_08-50-39.exr'

image = cv2.cvtColor(cv2.imread(image_path, cv2.IMREAD_UNCHANGED), cv2.COLOR_GRAY2RGB)

transfromed = test_transform(image=image)

X = torch.from_numpy(transfromed['image']).permute(2,0,1).unsqueeze(0).to(device)

x_id, x_gender, x_pose, x_emotion, x_facial_hair, x_occlusion, x_spectacles = model.get_result(X)
x_gender = torch.softmax(x_gender, dim=1)
print(x_gender)
x_spectacles = torch.softmax(x_spectacles, dim=1)
print(x_spectacles)
x_facial_hair = torch.softmax(x_facial_hair, dim=1)
print(x_facial_hair)
x_pose = torch.softmax(x_pose, dim=1)
print(x_pose)
x_occlusion = torch.softmax(x_occlusion, dim=1)
print(x_occlusion)
x_emotion = torch.softmax(x_emotion, dim=1)
print(x_emotion)

tensor([[0.1420, 0.8580]], device='cuda:0', grad_fn=<SoftmaxBackward0>)
tensor([[0.0030, 0.9970]], device='cuda:0', grad_fn=<SoftmaxBackward0>)
tensor([[0.8690, 0.1310]], device='cuda:0', grad_fn=<SoftmaxBackward0>)
tensor([[0.4120, 0.4890, 0.0990]], device='cuda:0', grad_fn=<SoftmaxBackward0>)
tensor([[0.2312, 0.3471, 0.4217]], device='cuda:0', grad_fn=<SoftmaxBackward0>)
tensor([[0.6135, 0.0481, 0.3384]], device='cuda:0', grad_fn=<SoftmaxBackward0>)


Gender (1), Spectacles (0), Facial_Hair (0), Pose(0), Occlusion (1),Emotion(0)

In [11]:
image_path = './Dataset/Albedo/gallery/1038/2009-07-10_09-49-50.exr'

image = cv2.cvtColor(cv2.imread(image_path, cv2.IMREAD_UNCHANGED), cv2.COLOR_GRAY2RGB)

transfromed = test_transform(image=image)

X = torch.from_numpy(transfromed['image']).permute(2,0,1).unsqueeze(0).to(device)

x_id, x_gender, x_pose, x_emotion, x_facial_hair, x_occlusion, x_spectacles = model.get_result(X)
x_gender = torch.softmax(x_gender, dim=1)
print(x_gender)
x_spectacles = torch.softmax(x_spectacles, dim=1)
print(x_spectacles)
x_facial_hair = torch.softmax(x_facial_hair, dim=1)
print(x_facial_hair)
x_pose = torch.softmax(x_pose, dim=1)
print(x_pose)
x_occlusion = torch.softmax(x_occlusion, dim=1)
print(x_occlusion)
x_emotion = torch.softmax(x_emotion, dim=1)
print(x_emotion)

tensor([[0.1173, 0.8827]], device='cuda:0', grad_fn=<SoftmaxBackward0>)
tensor([[0.9733, 0.0267]], device='cuda:0', grad_fn=<SoftmaxBackward0>)
tensor([[0.7232, 0.2768]], device='cuda:0', grad_fn=<SoftmaxBackward0>)
tensor([[0.3992, 0.3661, 0.2347]], device='cuda:0', grad_fn=<SoftmaxBackward0>)
tensor([[0.1896, 0.4835, 0.3269]], device='cuda:0', grad_fn=<SoftmaxBackward0>)
tensor([[0.4071, 0.0306, 0.5623]], device='cuda:0', grad_fn=<SoftmaxBackward0>)


        # 0: nhìn trực diện (2471), 1: nhìn nghiêng 1 chút (326), 2: lệch 30-45 độ (77)
        self.pose_loss = FocalLoss(alpha_weights={0: 0.0246, 1: 0.1863, 2: 0.7891}, gamma_weights={0: 1, 1: 0.5, 2: 0}, num_classes=3)
        # 0: tóc che mặt (13), 1: tay che mặt (46), 2: không bị che khuất (2615)
        self.occlusion_loss = FocalLoss(alpha_weights={0:0.7765, 1:0.2195, 2:0.0039}, gamma_weights={0: 0, 1: 0, 2: 1.5}, num_classes=3)
        # 0: nhìn trực diện (2209), 1: các cảm xúc khác (249), 2: tích cực (416)
        self.emotion_loss = FocalLoss(alpha_weights={0:0.0659, 1:0.5844, 2:0.3497}, gamma_weights={0: 0.5, 1: 0, 2: 0}, num_classes=3)

CONFIGURATION = {
    'type': 'albedo',
    
    # Thư mục
    'dataset_dir': './Dataset',
    'checkpoint_dir': './checkpoint/multi/',
    
    # Cấu hình train
    'device': device,
    'epochs': 39,
    'num_workers': 4,
    'batch_size': 16,
    'image_size': 256,
    'base_lr': 1e-4,
    
    # Cấu hình network
    'backbone': 'miresnet18',
    'embedding_size': 512,
    'num_classes': None,
    'loss_gender_weight': 10,
    'loss_da_gender_weight': 10,
    'loss_emotion_weight': 10,
    'loss_da_emotion_weight': 10,
    'loss_pose_weight': 20,
    'loss_da_pose_weight': 20,
    'loss_spectacles_weight': 5,
    'loss_da_spectacles_weight': 5,
    'loss_occlusion_weight': 20,
    'loss_da_occlusion_weight': 20,
    'loss_facial_hair_weight': 5,
    'loss_da_facial_hair_weight': 5,
}