In [1]:
# @title Download data
!gdown 1C-qdmgw_U4cM81ctaGkrCdi2VhuHstSk # train images
!gdown 10RCh2jwj0GBCTFtIO1YGcKxRwNhB_K5F # train json
!gdown 1JNO84TFvMDMTq2iFwXPKjFtiWoZoMFXb # test images
!gdown 1cOk47q8cOsujszoQHN9GizCfLDT7ZR_O # test json

Downloading...
From (original): https://drive.google.com/uc?id=1C-qdmgw_U4cM81ctaGkrCdi2VhuHstSk
From (redirected): https://drive.google.com/uc?id=1C-qdmgw_U4cM81ctaGkrCdi2VhuHstSk&confirm=t&uuid=8edf966e-fea9-43af-b33e-c96bea24d9ec
To: /content/training-images.zip
100% 870M/870M [00:29<00:00, 29.9MB/s]
Downloading...
From: https://drive.google.com/uc?id=10RCh2jwj0GBCTFtIO1YGcKxRwNhB_K5F
To: /content/vimmsd-train.json
100% 5.05M/5.05M [00:00<00:00, 78.6MB/s]
Downloading...
From (original): https://drive.google.com/uc?id=1JNO84TFvMDMTq2iFwXPKjFtiWoZoMFXb
From (redirected): https://drive.google.com/uc?id=1JNO84TFvMDMTq2iFwXPKjFtiWoZoMFXb&confirm=t&uuid=bd276285-67d5-40f9-a31a-cde3bdf45684
To: /content/public-test-images.zip
100% 82.5M/82.5M [00:01<00:00, 54.3MB/s]
Downloading...
From: https://drive.google.com/uc?id=1cOk47q8cOsujszoQHN9GizCfLDT7ZR_O
To: /content/vimmsd-public-test.json
100% 443k/443k [00:00<00:00, 138MB/s]


In [2]:
# @title Unzip and make data folders
!mkdir -p /content/data/annotations/train
!mkdir -p /content/data/annotations/test

!mkdir -p /content/data/images/train
!mkdir -p /content/data/images/dev

!cp /content/vimmsd-train.json /content/data/annotations/train.json
!cp /content/vimmsd-public-test.json /content/data/annotations/dev.json

!unzip -j /content/training-images.zip -d /content/data/images/train
!unzip -j /content/public-test-images.zip -d /content/data/images/dev

!rm /content/vimmsd-train.json
!rm /content/vimmsd-public-test.json
!rm /content/training-images.zip
!rm /content/public-test-images.zip

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: /content/data/images/train/20d72461a85aff40e94731e74f4270d8ab0f1ac34e1491078b834f5c38dfc874.jpg  
  inflating: /content/data/images/train/6b8c9513b205bee949425dc0c89de489d34fd3bdd1ddee7bad2480e6c0dc3b3b.jpg  
  inflating: /content/data/images/train/08e1c26045e07708fa61ea27cb49ec9338fbaedb2ba530cb775da1e32151a473.jpg  
  inflating: /content/data/images/train/035b92f3b46bcade1a33fd1b04d73dc8de26c277cda718107cd668c3732c2eba.jpg  
  inflating: /content/data/images/train/5b3380db77ecde8016559530b8f1eec6103d00d1aeb8158e90416da9c81417ce.jpg  
  inflating: /content/data/images/train/b20824a454c3d7a0194f6246f2c3817d5ae51701b64bec71028cc59b8e5a9b3c.jpg  
  inflating: /content/data/images/train/d0c5cc11e61bb2ef221dffb81e29583d496fb4258d3ef8327f84c4472c8de18d.jpg  
  inflating: /content/data/images/train/236147f8af18649494c0204e3652be0881c7f61ebca3de680ad7d26993d3942f.jpg  
  inflating: /content/data/images/train/cbf3a1c

In [3]:
#@title Drive mount
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
# @title Dependencies
import sys
import timm
import tqdm
import torch
import random
import numpy as np
from torch import nn, optim
from torch.utils.data import DataLoader
from timeit import default_timer as timer
from transformers import AutoTokenizer, AutoModel

In [5]:
# @title Personal modules
from drive.MyDrive.SharedTasks.UIT_DS_Challenge.make_dummy_data import make_dummy_data
from drive.MyDrive.SharedTasks.UIT_DS_Challenge.utilities import get_metrics, export_result
from drive.MyDrive.SharedTasks.UIT_DS_Challenge.data import load_data, get_labels, RawData, data_process

In [6]:
# @title Hyperparameters
from argparse import Namespace
args = {
    'CLS_DROPOUT': 0.15,
    'EPOCHS': 5,

    'PLM': 'uitnlp/visobert', # try mutiple PLMS

    'PVM': 'timm/resnet152.a1h_in1k', # try mutiple PVMS
    'PVM_OUTPUT_SIZE': 2048,
    'IMAGE_SIZE': 224,

    'PVM_LEARNING_RATE': 1e-5,
    'CLS_LEARNING_RATE': 1e-4,

    'CLS_SIZE': 512,
    'TRAIN_BATCH_SIZE': 32,
    'TEST_BATCH_SIZE': 16,
    'RANDOM_SEED': 2024
}
args['DEVICE'] = 'cuda' if torch.cuda.is_available() else 'cpu'
args = Namespace(**args)

In [7]:
# @title Set random seed
random.seed(args.RANDOM_SEED)
np.random.seed(args.RANDOM_SEED)
torch.manual_seed(args.RANDOM_SEED)
torch.cuda.manual_seed_all(args.RANDOM_SEED)

In [8]:
# @title Load data
''' There isn't any way to get the current folder path in colab smh '''
# current_path = pathlib.Path().resolve()
current_path = '/content/'

make_dummy_data(current_path, 1000)

images_train_dir, images_train, annotations_train = load_data(current_path, 'dummy_data', 'train')
images_dev_dir, images_dev, annotations_dev = load_data(current_path, 'dummy_data', 'dev')

print(f'Current directory: {current_path}')
print(f'No of train samples: {len(images_train)}')
print(f'No of dev samples: {len(images_dev)}')

Current directory: /content/
No of train samples: 9805
No of dev samples: 1000


In [9]:
# @title Load and create labels map
labels, labels_to_ids, ids_to_labels = get_labels(annotations_train)

print(f'All labels: {labels}')
print(f'labels to ids: {labels_to_ids}')
print(f'ids to labels: {ids_to_labels}')

All labels: {'text-sarcasm', 'multi-sarcasm', 'image-sarcasm', 'not-sarcasm'}
labels to ids: {'text-sarcasm': 0, 'multi-sarcasm': 1, 'image-sarcasm': 2, 'not-sarcasm': 3}
ids to labels: {0: 'text-sarcasm', 1: 'multi-sarcasm', 2: 'image-sarcasm', 3: 'not-sarcasm'}


In [10]:
# @title Preprocess data and create pytorch dataloaders
trainset_raw = RawData(images_train_dir, annotations_train, labels_to_ids)
devset_raw = RawData(images_dev_dir, annotations_dev, labels_to_ids)

tokenizer = AutoTokenizer.from_pretrained(args.PLM)

trainset_dataloader = DataLoader(trainset_raw, batch_size=args.TRAIN_BATCH_SIZE, shuffle=True,
                                 collate_fn=lambda batch: data_process(batch, tokenizer, 10, args.IMAGE_SIZE))
devset_dataloader = DataLoader(devset_raw, batch_size=args.TEST_BATCH_SIZE, shuffle=False,
                               collate_fn=lambda batch: data_process(batch, tokenizer, 10, args.IMAGE_SIZE))

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/644 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/471k [00:00<?, ?B/s]



In [11]:
# @title Number of data samples and batches
print(f'Number of samples in train set: {len(trainset_raw)}')
print(f'Number of samples in dev set: {len(devset_raw)}\n')

print(f'Number of train batches: {len(trainset_dataloader)}')
print(f'Number of dev batches: {len(devset_dataloader)}')

Number of samples in train set: 9805
Number of samples in dev set: 1000

Number of train batches: 307
Number of dev batches: 63


In [12]:
# @title Train function
def train_step(args, vision_model, classifier, loss_function, optimizer, dataloader, print_batch=50):
    '''
    Perform training process including forward and backward pass for one epoch.

        Parameters:
            args (Namespace): hyperparameters
            vision_model (nn.Module): pre-trained vision model
            classifier (nn.Module): classifier layers
            loss_function: loss function
            optimizer: optimizer algorithm
            dataloader: pytorch dataloader
            print_batch (int): print out train time and loss every number of print_batch
        Returns:
            time_total (float): total time to train one epoch
            loss_total (float): total train loss for one epoch
            loss_average (float): average train loss for one epoch
    '''
    vision_model.train()
    classifier.train()
    loss_total, time_total = 0, 0
    for batch_index, data in enumerate(dataloader):
        time_start = timer()
        images, labels = data['images'].to(args.DEVICE), data['labels'].to(args.DEVICE)
        vision_model_output = vision_model(images)
        logit = classifier(vision_model_output)
        loss = loss_function(logit, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        loss_total += loss
        time_end = timer()
        time_total += (time_end - time_start)
        if batch_index % 50 == 0:
            print(f'Time to train {batch_index} batches: {time_total:.5f} secs | Loss at batch {batch_index}: {loss:.5f}')
    loss_average = loss_total / len(dataloader)
    return time_total, loss_total, loss_average

In [13]:
# @title Test function
def test_step(args, vision_model, classifier, dataloader, print_batch=30):
    '''
    Perform testing process including forward pass.

        Parameters:
            args (Namespace): hyperparameters
            vision_model (nn.Module): pre-trained vision model
            classifier (nn.Module): classifier layers
            dataloader: pytorch dataloader
            print_batch (int): print out notification every number of print_batch
        Returns:
            labels_true (list): list of true labels (int)
            labels_pred (list): list of predicted labels (int)
    '''
    labels_true, labels_pred = [], []
    vision_model.eval()
    classifier.eval()
    with torch.inference_mode():
        for batch_index, data in enumerate(dataloader):
            images, labels = data['images'].to(args.DEVICE), data['labels'].to(args.DEVICE)
            vision_model_output = vision_model(images)
            logit = classifier(vision_model_output)
            batch_labels_pred = logit.argmax(dim=1)
            labels_true.extend(labels.tolist())
            labels_pred.extend(batch_labels_pred.tolist())
            if batch_index % 30 == 0:
                print(f'Finished testing {batch_index} batches')
    return labels_true, labels_pred

In [14]:
# @title Classifier model
class ClassificationLayers(nn.Module):
    def __init__(self, args, labels_to_ids):
        super().__init__()
        self.cls_layers = nn.Sequential(
                # nn.Linear(args.PLM_OUTPUT_SIZE + args.PVM_OUTPUT_SIZE, args.PLM_OUTPUT_SIZE + args.CLS_SIZE),
                # # nn.Tanh(),
                # nn.Dropout(args.CLS_DROPOUT),
                # nn.Linear(args.PLM_OUTPUT_SIZE + args.CLS_SIZE, args.CLS_SIZE),
                # # nn.Tanh(),
                # nn.Dropout(args.CLS_DROPOUT),
                # nn.Linear(args.CLS_SIZE, len(labels_to_ids))
                nn.Linear(args.PVM_OUTPUT_SIZE, len(labels_to_ids))
        )
    def forward(self, input):
        logit = self.cls_layers(input)
        return logit

In [15]:
# @title Load language model and classifier model
vision_model = timm.create_model(args.PVM, pretrained=True, num_classes=0)
classifier = ClassificationLayers(args, labels_to_ids)

vision_model, classifier = vision_model.to(args.DEVICE), classifier.to(args.DEVICE)

model.safetensors:   0%|          | 0.00/241M [00:00<?, ?B/s]

In [16]:
# @title Set optimizer and loss
vision_model_param = tuple(vision_model.named_parameters())
classifier_param = tuple(classifier.named_parameters())

training_param = [{'params': [param for name, param in vision_model_param], 'lr': args.PVM_LEARNING_RATE},
                  {'params': [param for name, param in classifier_param], 'lr': args.CLS_LEARNING_RATE}]

optimizer = optim.Adam(training_param)
loss_function = nn.CrossEntropyLoss()

In [None]:
# @title Execute
best_f1 = 0
for epoch in tqdm.trange(args.EPOCHS, file=sys.stdout):
    print(f'\n\nEpoch {epoch}:')
    print('-----------')
    time_total, loss_total, loss_average = train_step(args, vision_model, classifier, loss_function,
                                                      optimizer, trainset_dataloader, 50)
    print('-----------')
    print(f'Total train time: {time_total:.5f} secs')
    print(f'Total loss: {loss_total:.5f}')
    print(f'Average loss: {loss_average:.5f}')

    print('\nEvaluating.......')
    labels_true, labels_pred = test_step(args, vision_model, classifier, devset_dataloader, 30)
    print('****Finished testing****')

    micro_precision, micro_recall, micro_f1, cls_report = get_metrics(labels_true, labels_pred, labels_to_ids)
    print('\n[+] METRICS:')
    print(f'Micro precision: {micro_precision:.5f}')
    print(f'Micro recall: {micro_recall:.5f}')
    print(f'Micro F1: {micro_f1:.5f}')
    print(f'Classification report:\n{cls_report}')

    if micro_f1 > best_f1:
        best_f1 = micro_f1
        export_result('dev', labels_pred, ids_to_labels)

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch 0:
-----------
Time to train 0 batches: 2.10456 secs | Loss at batch 0: 1.40847
Time to train 50 batches: 19.01008 secs | Loss at batch 50: 0.98327
Time to train 100 batches: 37.67422 secs | Loss at batch 100: 0.93964
Time to train 150 batches: 55.75626 secs | Loss at batch 150: 0.81603
Time to train 200 batches: 73.74169 secs | Loss at batch 200: 0.74747
Time to train 250 batches: 91.82250 secs | Loss at batch 250: 0.91912
Time to train 300 batches: 109.87299 secs | Loss at batch 300: 0.93611
-----------
Total train time: 112.05090 secs
Total loss: 283.02112
Average loss: 0.92189

Evaluating.......
Finished testing 0 batches
Finished testing 30 batches
Finished testing 60 batches
****Finished testing****

[+] METRICS:
Micro precision: 0.58200
Micro recall: 0.58200
Micro F1: 0.58200
Classification report:
               precision    recall  f1-score   support

 text-sarcasm    0.00000   0.00000   0.00000         7
multi-sarcasm    0.58537  