In [1]:
import sys

from torchvision import transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F

sys.path.append("../")
from src import create_hf_train_dataset, Mixup, FashionNetDataset, Query2LabelNet, MultiLabelCrossEntropyLoss, Params, get_label_class_weights

In [2]:
data_dir = "../data"
config_path = "../configs/train_config.json"

In [3]:
train_config = Params(config_path)

In [4]:
hf_dataset_dict = create_hf_train_dataset(data_dir)
hf_train_dataset = hf_dataset_dict["train"]
hf_val_dataset = hf_dataset_dict["val"]


In [5]:
train_transforms = transforms.Compose(
    [
        transforms.Resize(train_config.DATA_AUGMENTATION.RESIZE_RESOLUTION),
        (
            transforms.RandomCrop(train_config.DATA_AUGMENTATION.TARGET_RESOLUTION)
            if train_config.DATA_AUGMENTATION.RANDOM_CROP
            else transforms.CenterCrop(
                train_config.DATA_AUGMENTATION.TARGET_RESOLUTION
            )
        ),
        (
            transforms.RandomHorizontalFlip()
            if train_config.DATA_AUGMENTATION.RANDOM_HORIZONTAL_FLIP
            else transforms.Lambda(lambda x: x)
        ),
        transforms.ToTensor(),
        transforms.Normalize(train_config.DATA_AUGMENTATION.NORM_MEAN, train_config.DATA_AUGMENTATION.NORM_STD),
    ]
)
train_dataset = FashionNetDataset(hf_dataset_dict["train"], train_transforms)
train_dataloader = DataLoader(train_dataset, batch_size=train_config.TRAINING.BATCH_SIZE.TRAIN, shuffle=True)

In [6]:
images, labels = next(iter(train_dataloader))

# Hard Targets

**Model Output**

In [7]:
model = Query2LabelNet(train_config)

In [8]:
model_pred = model(images)

In [9]:
model_pred

[tensor([[-1.6413e-01,  6.4673e-01,  9.5619e-01,  1.2714e+00,  4.6149e-01,
          -3.5299e-01,  1.1804e-01],
         [ 1.5713e-01,  2.6187e-01, -2.5281e-01, -1.9114e-01,  6.6781e-02,
          -1.2844e+00, -5.0515e-01],
         [-1.2203e-01, -5.0612e-01,  4.6533e-01,  1.5022e+00,  2.7885e-01,
          -1.0527e+00, -2.5585e-01],
         [ 1.5568e-02,  1.4897e-01,  3.6052e-01,  6.1510e-01,  2.7876e-01,
          -2.5397e-02, -4.5207e-01],
         [-5.7269e-01, -3.7186e-01,  5.7875e-01, -2.9305e-01,  5.5273e-01,
          -1.6974e-01, -1.0649e-01],
         [-5.6901e-01,  1.6599e-01,  4.2947e-01, -1.2641e-01,  1.4150e+00,
          -3.1884e-01,  4.6968e-01],
         [-1.4472e-01,  2.0786e-01,  1.2251e+00,  1.2905e-01, -2.0862e-03,
          -7.4313e-01, -7.3463e-02],
         [-3.3602e-01,  3.2396e-01, -2.3279e-02,  7.3989e-02, -9.1709e-01,
          -6.4786e-01, -5.9642e-01],
         [-1.4268e-02, -3.3245e-01,  5.4817e-01, -3.6096e-01,  1.3523e+00,
          -1.7644e-01,  4.174

In [10]:
for i, logits in enumerate(model_pred):
    print(f"Shape of the prediction for label {i}: {logits.shape}")

Shape of the prediction for label 0: torch.Size([64, 7])
Shape of the prediction for label 1: torch.Size([64, 3])
Shape of the prediction for label 2: torch.Size([64, 3])
Shape of the prediction for label 3: torch.Size([64, 4])
Shape of the prediction for label 4: torch.Size([64, 6])
Shape of the prediction for label 5: torch.Size([64, 3])


**Criterion Output**

In [11]:
label_class_weights = get_label_class_weights(hf_train_dataset["label"])

In [12]:
hard_criterion = MultiLabelCrossEntropyLoss(label_class_weights)

In [13]:
hard_loss = hard_criterion(model_pred, labels)

In [14]:
hard_loss

tensor(8.9595, grad_fn=<AddBackward0>)

# Soft Targets

In [15]:
mixup_fn = Mixup(train_config.MODEL.NUM_LABELS_LIST, mixup_alpha=train_config.DATA_AUGMENTATION.MIXUP, cutmix_alpha=train_config.DATA_AUGMENTATION.CUTMIX, cutmix_minmax=train_config.DATA_AUGMENTATION.CUTMIX_MINMAX,
            prob=train_config.DATA_AUGMENTATION.MIXUP_PROB, switch_prob=train_config.DATA_AUGMENTATION.MIXUP_SWITCH_PROB, mode=train_config.DATA_AUGMENTATION.MIXUP_MODE,
            label_smoothing=train_config.MODEL.LABEL_SMOOTHING)

In [16]:
mixed_images, mixed_targets, true_labels = mixup_fn(images, labels)

**Model Output**

In [17]:
model = Query2LabelNet(train_config)

In [18]:
model_pred_mixed = model(mixed_images)

In [19]:
model_pred_mixed

[tensor([[-0.0937,  0.1053, -0.4435, -0.0421,  0.5142,  0.7421, -0.5726],
         [ 0.2374,  0.0863, -0.9716, -0.6580, -0.6000, -1.4098,  0.5549],
         [-0.4004,  0.5705,  0.5833, -0.8481, -0.0698,  0.8609,  0.6741],
         [-1.0260, -0.4163,  0.7395, -0.5824,  0.1659,  0.2965, -0.3205],
         [ 0.4431, -0.5575, -0.7275, -0.0183,  0.1333, -0.2055, -0.6880],
         [-0.2424,  0.1955, -1.0947, -0.2130,  0.1788,  0.1222, -0.3365],
         [-0.0724,  0.3868, -0.4379, -0.5279,  0.2831,  1.1533,  0.4493],
         [ 0.5045, -0.1955, -0.1163,  0.6540, -0.5934,  0.0933, -0.2673],
         [-0.6875, -0.4587, -0.3039, -0.7907,  0.5372,  0.7148,  0.0524],
         [-0.5374,  1.0483, -0.1508, -0.9006, -0.0987,  0.3185,  0.6941],
         [ 0.1604, -0.0169,  0.6332, -0.7979, -0.0042,  0.4329,  0.4438],
         [-0.0443, -0.1739,  0.7485, -0.4913, -0.3044,  0.6663,  0.5491],
         [-0.0816, -0.6072, -0.7058, -0.0978, -0.1534,  1.2808,  0.1363],
         [ 0.1192, -0.4341,  0.4148, -

In [20]:
for i, logits in enumerate(model_pred_mixed):
    print(f"Shape of the prediction for label {i}: {logits.shape}")

Shape of the prediction for label 0: torch.Size([64, 7])
Shape of the prediction for label 1: torch.Size([64, 3])
Shape of the prediction for label 2: torch.Size([64, 3])
Shape of the prediction for label 3: torch.Size([64, 4])
Shape of the prediction for label 4: torch.Size([64, 6])
Shape of the prediction for label 5: torch.Size([64, 3])


**Criterion Output**

In [21]:
soft_criterion = MultiLabelCrossEntropyLoss(label_class_weights, is_soft_target=True)

In [22]:
soft_loss = soft_criterion(model_pred_mixed, mixed_targets)

In [23]:
soft_loss

tensor(3.0716, grad_fn=<AddBackward0>)