### Installation

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip install pandas_path pytorch-lightning transformers spacy ftfy

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
!pip install sacremoses

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
!pip install pandas-path

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
!pip freeze

absl-py==1.3.0
aeppl==0.0.33
aesara==2.7.9
aiohttp==3.8.3
aiosignal==1.3.1
alabaster==0.7.12
albumentations==1.2.1
altair==4.2.0
appdirs==1.4.4
arviz==0.12.1
astor==0.8.1
astropy==4.3.1
astunparse==1.6.3
async-timeout==4.0.2
atari-py==0.2.9
atomicwrites==1.4.1
attrs==22.1.0
audioread==3.0.0
autograd==1.5
Babel==2.11.0
backcall==0.2.0
beautifulsoup4==4.6.3
bleach==5.0.1
blis==0.7.9
bokeh==2.3.3
branca==0.6.0
bs4==0.0.1
CacheControl==0.12.11
cachetools==5.2.0
catalogue==2.0.8
certifi==2022.9.24
cffi==1.15.1
cftime==1.6.2
chardet==3.0.4
charset-normalizer==2.1.1
click==7.1.2
clikit==0.6.2
cloudpickle==1.5.0
cmake==3.22.6
cmdstanpy==1.0.8
colorcet==3.0.1
colorlover==0.3.0
community==1.0.0b1
confection==0.0.3
cons==0.4.5
contextlib2==0.5.5
convertdate==2.4.0
crashtest==0.3.1
crcmod==1.7
cufflinks==0.17.3
cupy-cuda11x==11.0.0
cvxopt==1.3.0
cvxpy==1.2.2
cycler==0.11.0
cymem==2.0.7
Cython==0.29.32
daft==0.0.4
dask==2022.2.1
datascience==0.17.5
db-dtypes==1.0.4
debugpy==1.0.0
decorator==4.4.2
d

In [None]:
%matplotlib inline

from pathlib import Path
from tqdm import tqdm

import random
import tarfile
import tempfile
import warnings
import matplotlib.pyplot as plt
import numpy as np
import json
import logging
import pandas as pd
import pandas_path  # Path style access for pandas
import pytorch_lightning as pl
import torch                    
import torchvision
from pandas_path import path
from PIL import Image

**check gpu**

In [None]:
torch.cuda.is_available()

True

In [None]:
!nvidia-smi -L

GPU 0: Tesla T4 (UUID: GPU-65870b45-81ce-c2c0-8c7f-a4ad4da428bb)


In [None]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

Fri Dec  9 07:48:35 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   68C    P0    31W /  70W |   7124MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

**check memory**

In [None]:
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('Not using a high-RAM runtime')
else:
  print('You are using a high-RAM runtime!')

Your runtime has 54.8 gigabytes of available RAM

You are using a high-RAM runtime!


In [None]:

# for the purposes of this post, we'll filter
# much of the lovely logging info from our LightningModule
warnings.filterwarnings("ignore")
logging.getLogger().setLevel(logging.WARNING)

In [None]:
cd /content/drive/MyDrive/DL7643/finalProject/hateful_memes

/content/drive/MyDrive/DL7643/finalProject/hateful_memes


In [None]:
ls

[0m[01;34mhateful_memes[0m/     [01;34mimage_unimodal_outputs[0m/   [01;34mtext_unimodal_outputs[0m/
hateful_memes.zip  [01;34mmodel-outputs[0m/
[01;34mhm_example_mmf[0m/    [01;34mmulti_unimodals_outputs[0m/


In [None]:
data_dir = Path("/content/drive/MyDrive/DL7643/finalProject/hateful_memes/hateful_memes")
print(data_dir)

train_path = data_dir / "train.jsonl"
dev_path = data_dir / "dev_seen.jsonl"
dev_unseen_path = data_dir / "dev_unseen.jsonl"
test_path = data_dir / "test_seen.jsonl"

/content/drive/MyDrive/DL7643/finalProject/hateful_memes/hateful_memes


In [None]:
print(dev_path)

/content/drive/MyDrive/DL7643/finalProject/hateful_memes/hateful_memes/dev_seen.jsonl


### **Build & Train Model**

In [None]:
class HMDataset(torch.utils.data.Dataset):
    # Data set preprocess
    def __init__(
        self,
        data_path,
        image_path,
        image_transform,
        text_transform,
        text_seq_max_length=100,
        enforce_balance=False,
        random_seed=0
    ):

        self.df = pd.read_json(
            data_path, lines=True
        )
        self.dev_limit = dev_limit
        if enforce_balance:
            neg = self.df[
                self.df.label.eq(0)
            ]
            pos = self.df[
                self.df.label.eq(1)
            ]
            self.df = pd.concat(
                [
                    neg.sample(
                        pos.shape[0], 
                        random_seed=random_seed
                    ), 
                    pos
                ]
            )
        self.df = self.df.reset_index(
            drop=True
        )
        self.df.img = self.df.apply(
            lambda row: (image_path / row.img), axis=1
        )

        if not self.df.img.path.exists().all():
            raise FileNotFoundError
        if not self.df.img.path.is_file().all():
            raise TypeError
            
        self.image_transform = image_transform
        self.text_transform = text_transform
        self.text_seq_max_length = text_seq_max_length

In [None]:
class ModelConcat(torch.nn.Module):
    def __init__(
        self,
        num_classes,
        loss_fn,
        language_model,
        image_model,
        text_embedding_dimension,
        image_embedding_dimension,
        fusion_size,
        dropout_p,
        text_seq_max_length=30,
    ):
        super(ModelConcat, self).__init__()
        self.language_model = language_model
        self.image_model = image_model
        inputs_size = self.language_model.config.hidden_size + image_embedding_dimension
        self.fusion_layer = torch.nn.Linear(
            in_features=inputs_size, 
            out_features=fusion_size
        )
        self.fc_layer = torch.nn.Linear(
            in_features=fusion_size, 
            out_features=num_classes
        )
        self.loss_fn = loss_fn
        self.dropout = torch.nn.Dropout(dropout_p)
        
    def forward(self, text, image, label=None):
        lm_outputs = self.language_model(**text)
        last_hidden_state = lm_outputs[0]
        text_outputs = torch.squeeze(last_hidden_state[:,0,:])
        text_embeddings = torch.nn.functional.tanh(
            text_outputs
        )
        image_embeddings = torch.nn.functional.relu(
            self.image_model(image)
        )
        concat_embeddings = torch.cat(
            [text_embeddings, image_embeddings], dim=1
        )
        fuse_output = self.dropout(
            torch.nn.functional.relu(
            self.fusion_layer(concat_embeddings)
            )
        )
        logits = self.fc_layer(fuse_output)
        prediction = torch.nn.functional.softmax(logits)
        loss = (
            self.loss_fn(prediction, label) 
            if label is not None else label
        )
        return (prediction, loss)

In [None]:
class HMModel(pl.LightningModule):
    def __init__(self, hyperparameters):
        for data_key in ["train_path", "dev_path", "image_path",]:
            if data_key not in hyperparameters.keys():
                raise KeyError(
                    f"{data_key} is a required hyperparameters in this model"
                )
        
        super(HMModel, self).__init__()
        self.hyperparameters = hyperparameters
        
        self.embedding_dim = self.hyperparameters.get("embedding_dim", 128)
        self.language_model_type = self.hyperparameters.get(
            "language_model_type", "bert-base-cased"
        )
        self.image_model_type = self.hyperparameters.get(
            "image_model_type", "resnet152"
        )
        self.image_model = self.fetch_image_model(self.image_model_type)
        self.text_seq_max_length = self.hyperparameters.get(
            "text_seq_max_length", 100
        )
        self.text_embedding_dimension = self.hyperparameters.get(
            "text_embedding_dimension", 128
        )
        self.image_embedding_dimension = self.hyperparameters.get(
            "image_embedding_dimension", self.text_embedding_dimension
        )
        self.output_path = Path(
            self.hyperparameters.get("output_path", "model_output")
        )
        self.output_path.mkdir(exist_ok=True)

        self.text_transform = self.transform_text()
        self.image_transform = self.transform_image()
        self.training_dataset = self.create_dateset("train_path")
        self.dev_dataset = self.create_dateset("dev_path")
        
        self.model = self.create_model()
        self.trainer_parameters = self.fetch_trainer_parameters()
    
    def forward(self, text, image, label=None):
        return self.model(text, image, label)

    def seed(self, seed):
        random.seed(seed)
        np.random.seed(seed)
        torch.manual_seed(seed)
        if torch.cuda.is_available():
            torch.cuda.manual_seed_all(seed)

    def transform_text(self):
        language_transform = AutoTokenizer.from_pretrained(self.language_model_type)
        language_transform.add_special_tokens({ "pad_token": "0" })
        return language_transform
    
    def transform_image(self):
        image_dim = self.hyperparameters.get("image_dim", 224)
        image_transform = torchvision.transforms.Compose(
            [
                torchvision.transforms.Resize(
                    size=(image_dim, image_dim)
                ),        
                torchvision.transforms.ToTensor(),
                torchvision.transforms.Normalize(
                    mean=(0.485, 0.456, 0.406), 
                    std=(0.229, 0.224, 0.225)
                ),
            ]
        )
        return image_transform

    def create_dateset(self, dataset_key):
        return HMDataset(
            data_path=self.hyperparameters.get(dataset_key, dataset_key),
            image_path=self.hyperparameters.get("image_path"),
            image_transform=self.image_transform,
            text_transform=self.text_transform,
            text_seq_max_length=self.text_seq_max_length,
            enforce_balance=True if "train" in str(dataset_key) else False
        )

    def fetch_image_model(self, model_name):
        image_model_map = {
            "alexnet" : torchvision.models.alexnet,
            "densenet161" : torchvision.models.densenet161,
            "resnet152" : torchvision.models.resnet152,
            "resnext101_32x8d" : torchvision.models.resnext101_32x8d,
            "shufflenet_v2_x2_0" : torchvision.models.shufflenet_v2_x2_0,
            "vgg19_bn" : torchvision.models.vgg19_bn,
            "wide_resnet50_2" : torchvision.models.wide_resnet50_2,
            "wide_resnet101_2" : torchvision.models.wide_resnet101_2,
        }
        return image_model_map.get(model_name)
    
    def create_model(self):
        language_model = AutoModel.from_pretrained(self.language_model_type)
        image_model = self.image_model(
            pretrained=True
        )
        image_model.fc = torch.nn.Linear(
                in_features=2048,
                out_features=self.image_embedding_dimension
        )

        return ModelConcat(
            num_classes=self.hyperparameters.get("num_classes", 2),
            loss_fn=torch.nn.CrossEntropyLoss(),
            language_model=language_model,
            image_model=image_model,
            text_embedding_dimension=self.text_embedding_dimension,
            image_embedding_dimension=self.image_embedding_dimension,
            fusion_size=self.hyperparameters.get(
                "fusion_size", 512
            ),
            dropout_p=self.hyperparameters.get("dropout_p", 0.1),
            text_seq_max_length=self.text_seq_max_length
        )

    def training_step(self, batch, batch_nb):
        predictions, loss = self.forward(
            text=batch["text"], 
            image=batch["image"], 
            label=batch["label"]
        )
        
        return {"loss": loss}

    def validation_step(self, batch, batch_nb):
        predictions, loss = self.eval().forward(
            text=batch["text"], 
            image=batch["image"], 
            label=batch["label"]
        )
        
        return {"batch_val_loss": loss}

    def validation_epoch_end(self, outputs):
        avg_loss = torch.stack(
            tuple(
                output["batch_val_loss"] 
                for output in outputs
            )
        ).mean()
        self.log("avg_val_loss", avg_loss)
        
        return {
            "val_loss": avg_loss,
            "progress_bar":{"avg_val_loss": avg_loss}
        }

    def configure_optimizers(self):
        optimizers = [
            torch.optim.AdamW(
                self.model.parameters(), 
                lr=self.hyperparameters.get("lr", 0.001)
            )
        ]

        scheduler = {
            'scheduler': torch.optim.lr_scheduler.ReduceLROnPlateau(
                optimizers[0]
            ),
            'reduce_on_plateau': True,
            'monitor': 'avg_val_loss'
        }

        schedulers = [ scheduler ]
        return optimizers, schedulers
    
    def train_dataloader(self):        
        return torch.utils.data.DataLoader(
            self.training_dataset, 
            shuffle=True, 
            batch_size=self.hyperparameters.get("batch_size", 5), 
            num_workers=self.hyperparameters.get("num_workers", 20)
        )

    def val_dataloader(self):
        return torch.utils.data.DataLoader(
            self.dev_dataset, 
            shuffle=False, 
            batch_size=self.hyperparameters.get("batch_size", 5), 
            num_workers=self.hyperparameters.get("num_workers", 20)
        )
    
    def fit(self):
        self.seed(self.hyperparameters.get("random_seed", 101))
        self.trainer = pl.Trainer(**self.trainer_parameters)
        self.trainer.fit(self)
    
    def fetch_trainer_parameters(self):
        checkpoint_callback = pl.callbacks.ModelCheckpoint(
            dirpath=self.output_path,
            monitor=self.hyperparameters.get(
                "checkpoint_monitor", "avg_val_loss"
            ),
            mode=self.hyperparameters.get(
                "checkpoint_monitor_mode", "min"
            ),
            verbose=self.hyperparameters.get("verbose", True)
        )

        early_stop_callback = pl.callbacks.EarlyStopping(
            monitor=self.hyperparameters.get(
                "early_stop_monitor", "avg_val_loss"
            ),
            min_delta=self.hyperparameters.get(
                "early_stop_min_delta", 0.001
            ),
            patience=self.hyperparameters.get(
                "early_stop_patience", 3
            ),
            verbose=self.hyperparameters.get("verbose", True),
        )

        trainer_parameters = {
            "enable_checkpointing": True,
            "callbacks": [checkpoint_callback, early_stop_callback],
            "default_root_dir": self.output_path,
            "accumulate_grad_batches": self.hyperparameters.get(
                "accumulate_grad_batches", 1
            ),
            "auto_select_gpus": self.hyperparameters.get("auto_select_gpus", False),
            "devices": self.hyperparameters.get("devices", 1),
            "accelerator": self.hyperparameters.get("accelerator", "cpu"),
            "max_epochs": self.hyperparameters.get("max_epochs", 100),
            "gradient_clip_val": self.hyperparameters.get(
                "gradient_clip_value", 1
            )
        }
        return trainer_parameters

    @torch.no_grad()
    def eval_accuracy(self, test_path):
        testing_data = self.create_dateset(test_path)
        data_frame = pd.DataFrame(
            index=testing_data.df.id,
            columns=["proba", "label", "target"]
        )
        test_dataloader = torch.utils.data.DataLoader(
            testing_data, 
            shuffle=False, 
            batch_size=self.hyperparameters.get("batch_size", 4), 
            num_workers=self.hyperparameters.get("num_workers", 16))
        for batch in tqdm(test_dataloader, total=len(test_dataloader)):
            predictions, _ = self.model.eval().to("cpu")(
                batch["text"], batch["image"]
            )
            data_frame.loc[batch["id"], "proba"] = predictions[:, 1]
            data_frame.loc[batch["id"], "label"] = predictions.argmax(dim=1)
            data_frame.loc[batch["id"], "target"] = batch["label"]
        data_frame.proba = data_frame.proba.astype(float)        
        data_frame.label = data_frame.label.astype(int)
        data_frame.target = data_frame.target.astype(int)
        return data_frame

In [None]:
def gen_accuracy(language_model_type, image_model_type):
    print(language_model_type, image_model_type)
    hyperparameters = {
        # Required hyperparameters
        "train_path": train_path,
        "dev_path": dev_path,
        "image_path": data_dir,
        
        # Optional hyperparameters
        "embedding_dim": 150,
        "language_model_type": language_model_type,
        "image_model_type": image_model_type,
        "text_seq_max_length": 32,
        "text_embedding_dimension": 300,
        "image_embedding_dimension": 300,
        "fusion_size": 256,
        "output_path": "model_output",
        "dev_limit": None,
        "lr": 0.00005,
        "max_epochs": 10,
        "devices": 1,
        "accelerator": "gpu",
        # "auto_select_gpus": True,
        # "n_gpu": 100,
        "batch_size": 4,
        # allows us to "simulate" having larger batches 
        "accumulate_grad_batches": 16,
        "early_stop_patience": 5,
    }
    hateful_memes_model = HMModel(hyperparameters=hyperparameters)
    hateful_memes_model.fit()
    
    checkpoints = glob(os.path.join("model_output", "*.ckpt"))    
    print(checkpoints)
    hateful_memes_model_best = HMModel.load_from_checkpoint(
        checkpoints[-1],
        hyperparameters=hyperparameters
    )
    accuracy_eval = hateful_memes_model_best.eval_accuracy(
        dev_unseen_path
    )
    acc = accuracy_eval["target"] == accuracy_eval["label"]
    accuracy = acc[acc == True].count()/acc.count()
    auc_roc = roc_auc_score(accuracy_eval["label"], accuracy_eval["target"])
    f1 = f1_score(accuracy_eval["label"], accuracy_eval["target"])

    return accuracy_eval, accuracy, auc_roc, f1

In [None]:
# test 1 with vision_feature_dim: 1000 
tests = [       
    ("distilbert-base-uncased", "densenet161"),
    ("distilbert-base-uncased", "vgg19_bn"),
    ("bert-base-uncased", "densenet161"),
    ("bert-base-uncased", "vgg19_bn"),
    ("roberta-base", "densenet161"),
    ("roberta-base", "vgg19_bn"),
    ("distilroberta-base", "densenet161"),
    ("distilroberta-base", "vgg19_bn"),
    ("gpt2", "densenet161"),
    ("gpt2", "vgg19_bn"),
    ("distilgpt2", "densenet161"),
    ("distilgpt2", "vgg19_bn"),
    ("albert-base-v2", "densenet161"),
    ("albert-base-v2", "vgg19_bn"),
    ("xlm-roberta-base", "densenet161"),
    ("xlm-roberta-base", "vgg19_bn"),
    ("squeezebert/squeezebert-uncased", "densenet161"),
    ("squeezebert/squeezebert-uncased", "vgg19_bn"),
]

# test 2 with vision_feature_dim: 300 
# tests = [
#     ("distilbert-base-uncased", "resnet152"),
#     ("distilbert-base-uncased", "wide_resnet101_2"),
#     ("bert-base-uncased", "densenet161"),
#     ("bert-base-uncased", "vgg19_bn"),
#     ("roberta-base", "resnet152"),
#     ("roberta-base", "wide_resnet101_2"),
#     ("distilroberta-base", "resnet152"),
#     ("distilroberta-base", "wide_resnet101_2"),
#     ("gpt2", "resnet152"),
#     ("gpt2", "wide_resnet101_2"),
#     ("distilgpt2", "resnet152"),
#     ("distilgpt2", "wide_resnet101_2"),
#     ("albert-base-v2", "resnet152"),
#     ("albert-base-v2", "wide_resnet101_2"),
#     ("xlm-roberta-base", "resnet152"),
#     ("xlm-roberta-base", "wide_resnet101_2"),
#     ("squeezebert/squeezebert-uncased", "resnet152"),
#     ("squeezebert/squeezebert-uncased", "wide_resnet101_2"),
# ]

for language_model_name, vision_model_name in tests:
    _, accuracy, auc_roc, f1 = get_accuracy_for(language_model_name, vision_model_name)
    print("\n model: {} & {}, \n accuracy: {}, \n auc_roc: {}, \n f1 score: {}".format(language_model_name, vision_model_name, accuracy, auc_roc, f1))

distilbert-base-uncased densenet161


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_layer_norm.weight', 'vocab_projector.bias', 'vocab_transform.bias', 'vocab_projector.weight', 'vocab_layer_norm.bias', 'vocab_transform.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.u

Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric avg_val_loss improved. New best score: 0.704
INFO:pytorch_lightning.utilities.rank_zero:Epoch 0, global step 95: 'avg_val_loss' reached 0.70426 (best 0.70426), saving model to '/content/drive/MyDrive/DL7643/finalProject/hateful_memes/model-outputs/epoch=0-step=95-v11.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric avg_val_loss improved by 0.021 >= min_delta = 0.001. New best score: 0.683
INFO:pytorch_lightning.utilities.rank_zero:Epoch 1, global step 190: 'avg_val_loss' reached 0.68332 (best 0.68332), saving model to '/content/drive/MyDrive/DL7643/finalProject/hateful_memes/model-outputs/epoch=1-step=190-v9.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 2, global step 285: 'avg_val_loss' was not in top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 3, global step 380: 'avg_val_loss' reached 0.68325 (best 0.68325), saving model to '/content/drive/MyDrive/DL7643/finalProject/hateful_memes/model-outputs/epoch=3-step=380-v1.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Monitored metric avg_val_loss did not improve in the last 3 records. Best score: 0.683. Signaling Trainer to stop.
INFO:pytorch_lightning.utilities.rank_zero:Epoch 4, global step 475: 'avg_val_loss' was not in top 1


['model-outputs/epoch=0-step=95.ckpt', 'model-outputs/epoch=0-step=95-v1.ckpt', 'model-outputs/epoch=0-step=95-v2.ckpt', 'model-outputs/epoch=0-step=95-v3.ckpt', 'model-outputs/epoch=0-step=95-v4.ckpt', 'model-outputs/epoch=0-step=95-v5.ckpt', 'model-outputs/epoch=3-step=380.ckpt', 'model-outputs/epoch=4-step=475.ckpt', 'model-outputs/epoch=1-step=190.ckpt', 'model-outputs/epoch=1-step=190-v1.ckpt', 'model-outputs/epoch=0-step=95-v6.ckpt', 'model-outputs/epoch=2-step=285.ckpt', 'model-outputs/epoch=4-step=475-v1.ckpt', 'model-outputs/epoch=0-step=95-v7.ckpt', 'model-outputs/epoch=1-step=190-v2.ckpt', 'model-outputs/epoch=4-step=475-v2.ckpt', 'model-outputs/epoch=1-step=190-v3.ckpt', 'model-outputs/epoch=0-step=95-v8.ckpt', 'model-outputs/epoch=1-step=190-v4.ckpt', 'model-outputs/epoch=0-step=95-v9.ckpt', 'model-outputs/epoch=1-step=190-v5.ckpt', 'model-outputs/epoch=1-step=190-v6.ckpt', 'model-outputs/epoch=1-step=190-v7.ckpt', 'model-outputs/epoch=0-step=95-v10.ckpt', 'model-outputs/e

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_layer_norm.weight', 'vocab_projector.bias', 'vocab_transform.bias', 'vocab_projector.weight', 'vocab_layer_norm.bias', 'vocab_transform.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).

  0%|          | 0/135 [00:00<?, ?it/s][A
  1%|          | 1/135 [00:03<08:20,  3.74s/it][A
  1%|▏         | 2/135 [00:04<04:02,  1.82s/it][A
  2%|▏         | 3/135 [00:04<02:39,  1.21s/it][A
  3%|▎         | 4/135 [00:05<01:59,  1.10it/s][A
  4%|▎         | 5/13


 model: distilbert-base-uncased & densenet161, 
 accuracy: 0.6018518518518519, 
 auc_roc: 0.5735261744375468, 
 f1 score: 0.46384039900249374
distilbert-base-uncased vgg19_bn


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_layer_norm.weight', 'vocab_projector.bias', 'vocab_transform.bias', 'vocab_projector.weight', 'vocab_layer_norm.bias', 'vocab_transform.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.u

Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric avg_val_loss improved. New best score: 0.715
INFO:pytorch_lightning.utilities.rank_zero:Epoch 0, global step 95: 'avg_val_loss' reached 0.71484 (best 0.71484), saving model to '/content/drive/MyDrive/DL7643/finalProject/hateful_memes/model-outputs/epoch=0-step=95-v11.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric avg_val_loss improved by 0.022 >= min_delta = 0.001. New best score: 0.693
INFO:pytorch_lightning.utilities.rank_zero:Epoch 1, global step 190: 'avg_val_loss' reached 0.69292 (best 0.69292), saving model to '/content/drive/MyDrive/DL7643/finalProject/hateful_memes/model-outputs/epoch=1-step=190-v9.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 2, global step 285: 'avg_val_loss' was not in top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 3, global step 380: 'avg_val_loss' was not in top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Monitored metric avg_val_loss did not improve in the last 3 records. Best score: 0.693. Signaling Trainer to stop.
INFO:pytorch_lightning.utilities.rank_zero:Epoch 4, global step 475: 'avg_val_loss' was not in top 1


['model-outputs/epoch=0-step=95.ckpt', 'model-outputs/epoch=0-step=95-v1.ckpt', 'model-outputs/epoch=0-step=95-v2.ckpt', 'model-outputs/epoch=0-step=95-v3.ckpt', 'model-outputs/epoch=0-step=95-v4.ckpt', 'model-outputs/epoch=0-step=95-v5.ckpt', 'model-outputs/epoch=3-step=380.ckpt', 'model-outputs/epoch=4-step=475.ckpt', 'model-outputs/epoch=1-step=190.ckpt', 'model-outputs/epoch=1-step=190-v1.ckpt', 'model-outputs/epoch=0-step=95-v6.ckpt', 'model-outputs/epoch=2-step=285.ckpt', 'model-outputs/epoch=4-step=475-v1.ckpt', 'model-outputs/epoch=0-step=95-v7.ckpt', 'model-outputs/epoch=1-step=190-v2.ckpt', 'model-outputs/epoch=4-step=475-v2.ckpt', 'model-outputs/epoch=1-step=190-v3.ckpt', 'model-outputs/epoch=0-step=95-v8.ckpt', 'model-outputs/epoch=1-step=190-v4.ckpt', 'model-outputs/epoch=0-step=95-v9.ckpt', 'model-outputs/epoch=1-step=190-v5.ckpt', 'model-outputs/epoch=1-step=190-v6.ckpt', 'model-outputs/epoch=1-step=190-v7.ckpt', 'model-outputs/epoch=0-step=95-v10.ckpt', 'model-outputs/e

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_layer_norm.weight', 'vocab_projector.bias', 'vocab_transform.bias', 'vocab_projector.weight', 'vocab_layer_norm.bias', 'vocab_transform.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).

  0%|          | 0/135 [00:00<?, ?it/s][A
  1%|          | 1/135 [00:05<11:21,  5.08s/it][A
  1%|▏         | 2/135 [00:05<05:21,  2.42s/it][A
  2%|▏         | 3/135 [00:06<03:27,  1.57s/it][A
  3%|▎         | 4/135 [00:06<02:33,  1.17s/it][A
  4%|▎         | 5/13


 model: distilbert-base-uncased & vgg19_bn, 
 accuracy: 0.5962962962962963, 
 auc_roc: 0.5544423684040977, 
 f1 score: 0.40437158469945356
bert-base-uncased densenet161


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0

Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric avg_val_loss improved. New best score: 0.672
INFO:pytorch_lightning.utilities.rank_zero:Epoch 0, global step 95: 'avg_val_loss' reached 0.67181 (best 0.67181), saving model to '/content/drive/MyDrive/DL7643/finalProject/hateful_memes/model-outputs/epoch=0-step=95-v11.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 1, global step 190: 'avg_val_loss' was not in top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 2, global step 285: 'avg_val_loss' was not in top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Monitored metric avg_val_loss did not improve in the last 3 records. Best score: 0.672. Signaling Trainer to stop.
INFO:pytorch_lightning.utilities.rank_zero:Epoch 3, global step 380: 'avg_val_loss' was not in top 1


['model-outputs/epoch=0-step=95.ckpt', 'model-outputs/epoch=0-step=95-v1.ckpt', 'model-outputs/epoch=0-step=95-v2.ckpt', 'model-outputs/epoch=0-step=95-v3.ckpt', 'model-outputs/epoch=0-step=95-v4.ckpt', 'model-outputs/epoch=0-step=95-v5.ckpt', 'model-outputs/epoch=3-step=380.ckpt', 'model-outputs/epoch=4-step=475.ckpt', 'model-outputs/epoch=1-step=190.ckpt', 'model-outputs/epoch=1-step=190-v1.ckpt', 'model-outputs/epoch=0-step=95-v6.ckpt', 'model-outputs/epoch=2-step=285.ckpt', 'model-outputs/epoch=4-step=475-v1.ckpt', 'model-outputs/epoch=0-step=95-v7.ckpt', 'model-outputs/epoch=1-step=190-v2.ckpt', 'model-outputs/epoch=4-step=475-v2.ckpt', 'model-outputs/epoch=1-step=190-v3.ckpt', 'model-outputs/epoch=0-step=95-v8.ckpt', 'model-outputs/epoch=1-step=190-v4.ckpt', 'model-outputs/epoch=0-step=95-v9.ckpt', 'model-outputs/epoch=1-step=190-v5.ckpt', 'model-outputs/epoch=1-step=190-v6.ckpt', 'model-outputs/epoch=1-step=190-v7.ckpt', 'model-outputs/epoch=0-step=95-v10.ckpt', 'model-outputs/e

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).

  0%|          | 0/135 [00:00<?, ?it/s][A
  1%|          | 1/135 [00:04<10:24,  4.66s/it][A
  1%|▏         | 2/135 [00:05<05:04,  2.29s/it][A
  2%|▏   


 model: bert-base-uncased & densenet161, 
 accuracy: 0.6222222222222222, 
 auc_roc: 0.5762811557973678, 
 f1 score: 0.3892215568862275
bert-base-uncased vgg19_bn


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0

Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric avg_val_loss improved. New best score: 0.685
INFO:pytorch_lightning.utilities.rank_zero:Epoch 0, global step 95: 'avg_val_loss' reached 0.68452 (best 0.68452), saving model to '/content/drive/MyDrive/DL7643/finalProject/hateful_memes/model-outputs/epoch=0-step=95-v12.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 1, global step 190: 'avg_val_loss' was not in top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 2, global step 285: 'avg_val_loss' was not in top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Monitored metric avg_val_loss did not improve in the last 3 records. Best score: 0.685. Signaling Trainer to stop.
INFO:pytorch_lightning.utilities.rank_zero:Epoch 3, global step 380: 'avg_val_loss' was not in top 1


['model-outputs/epoch=0-step=95.ckpt', 'model-outputs/epoch=0-step=95-v1.ckpt', 'model-outputs/epoch=0-step=95-v2.ckpt', 'model-outputs/epoch=0-step=95-v3.ckpt', 'model-outputs/epoch=0-step=95-v4.ckpt', 'model-outputs/epoch=0-step=95-v5.ckpt', 'model-outputs/epoch=3-step=380.ckpt', 'model-outputs/epoch=4-step=475.ckpt', 'model-outputs/epoch=1-step=190.ckpt', 'model-outputs/epoch=1-step=190-v1.ckpt', 'model-outputs/epoch=0-step=95-v6.ckpt', 'model-outputs/epoch=2-step=285.ckpt', 'model-outputs/epoch=4-step=475-v1.ckpt', 'model-outputs/epoch=0-step=95-v7.ckpt', 'model-outputs/epoch=1-step=190-v2.ckpt', 'model-outputs/epoch=4-step=475-v2.ckpt', 'model-outputs/epoch=1-step=190-v3.ckpt', 'model-outputs/epoch=0-step=95-v8.ckpt', 'model-outputs/epoch=1-step=190-v4.ckpt', 'model-outputs/epoch=0-step=95-v9.ckpt', 'model-outputs/epoch=1-step=190-v5.ckpt', 'model-outputs/epoch=1-step=190-v6.ckpt', 'model-outputs/epoch=1-step=190-v7.ckpt', 'model-outputs/epoch=0-step=95-v10.ckpt', 'model-outputs/e

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).

  0%|          | 0/135 [00:00<?, ?it/s][A
  1%|          | 1/135 [00:06<13:30,  6.05s/it][A
  1%|▏         | 2/135 [00:06<06:17,  2.84s/it][A
  2%|▏   


 model: bert-base-uncased & vgg19_bn, 
 accuracy: 0.6277777777777778, 
 auc_roc: 0.5871968279579121, 
 f1 score: 0.4240687679083094
roberta-base densenet161


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU avail

Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric avg_val_loss improved. New best score: 0.723
INFO:pytorch_lightning.utilities.rank_zero:Epoch 0, global step 95: 'avg_val_loss' reached 0.72330 (best 0.72330), saving model to '/content/drive/MyDrive/DL7643/finalProject/hateful_memes/model-outputs/epoch=0-step=95-v13.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric avg_val_loss improved by 0.025 >= min_delta = 0.001. New best score: 0.699
INFO:pytorch_lightning.utilities.rank_zero:Epoch 1, global step 190: 'avg_val_loss' reached 0.69850 (best 0.69850), saving model to '/content/drive/MyDrive/DL7643/finalProject/hateful_memes/model-outputs/epoch=1-step=190-v10.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 2, global step 285: 'avg_val_loss' was not in top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric avg_val_loss improved by 0.005 >= min_delta = 0.001. New best score: 0.694
INFO:pytorch_lightning.utilities.rank_zero:Epoch 3, global step 380: 'avg_val_loss' reached 0.69373 (best 0.69373), saving model to '/content/drive/MyDrive/DL7643/finalProject/hateful_memes/model-outputs/epoch=3-step=380-v2.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric avg_val_loss improved by 0.007 >= min_delta = 0.001. New best score: 0.687
INFO:pytorch_lightning.utilities.rank_zero:Epoch 4, global step 475: 'avg_val_loss' reached 0.68650 (best 0.68650), saving model to '/content/drive/MyDrive/DL7643/finalProject/hateful_memes/model-outputs/epoch=4-step=475-v3.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 5, global step 570: 'avg_val_loss' was not in top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 6, global step 665: 'avg_val_loss' was not in top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Monitored metric avg_val_loss did not improve in the last 3 records. Best score: 0.687. Signaling Trainer to stop.
INFO:pytorch_lightning.utilities.rank_zero:Epoch 7, global step 760: 'avg_val_loss' was not in top 1


['model-outputs/epoch=0-step=95.ckpt', 'model-outputs/epoch=0-step=95-v1.ckpt', 'model-outputs/epoch=0-step=95-v2.ckpt', 'model-outputs/epoch=0-step=95-v3.ckpt', 'model-outputs/epoch=0-step=95-v4.ckpt', 'model-outputs/epoch=0-step=95-v5.ckpt', 'model-outputs/epoch=3-step=380.ckpt', 'model-outputs/epoch=4-step=475.ckpt', 'model-outputs/epoch=1-step=190.ckpt', 'model-outputs/epoch=1-step=190-v1.ckpt', 'model-outputs/epoch=0-step=95-v6.ckpt', 'model-outputs/epoch=2-step=285.ckpt', 'model-outputs/epoch=4-step=475-v1.ckpt', 'model-outputs/epoch=0-step=95-v7.ckpt', 'model-outputs/epoch=1-step=190-v2.ckpt', 'model-outputs/epoch=4-step=475-v2.ckpt', 'model-outputs/epoch=1-step=190-v3.ckpt', 'model-outputs/epoch=0-step=95-v8.ckpt', 'model-outputs/epoch=1-step=190-v4.ckpt', 'model-outputs/epoch=0-step=95-v9.ckpt', 'model-outputs/epoch=1-step=190-v5.ckpt', 'model-outputs/epoch=1-step=190-v6.ckpt', 'model-outputs/epoch=1-step=190-v7.ckpt', 'model-outputs/epoch=0-step=95-v10.ckpt', 'model-outputs/e

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).

  0%|          | 0/135 [00:00<?, ?it/s][A
  1%|          | 1/135 [00:04<10:15,  4.59s/it][A
  1%|▏         | 2/135 [00:05<04:52,  2.20s/it][A
  2%|▏         | 3/135 [00:05<03:10,  1.44s/it][A
  3%|▎         | 4/135 [00:06<02:21,  1.08s/it][A
  4%|▎         | 5/135 [00:06<01:54,  1.14it/s][


 model: roberta-base & densenet161, 
 accuracy: 0.6222222222222222, 
 auc_roc: 0.5918796992481203, 
 f1 score: 0.47692307692307695
roberta-base vgg19_bn


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU avail

Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric avg_val_loss improved. New best score: 0.696
INFO:pytorch_lightning.utilities.rank_zero:Epoch 0, global step 95: 'avg_val_loss' reached 0.69625 (best 0.69625), saving model to '/content/drive/MyDrive/DL7643/finalProject/hateful_memes/model-outputs/epoch=0-step=95-v13.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 1, global step 190: 'avg_val_loss' was not in top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 2, global step 285: 'avg_val_loss' was not in top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Monitored metric avg_val_loss did not improve in the last 3 records. Best score: 0.696. Signaling Trainer to stop.
INFO:pytorch_lightning.utilities.rank_zero:Epoch 3, global step 380: 'avg_val_loss' was not in top 1


['model-outputs/epoch=0-step=95.ckpt', 'model-outputs/epoch=0-step=95-v1.ckpt', 'model-outputs/epoch=0-step=95-v2.ckpt', 'model-outputs/epoch=0-step=95-v3.ckpt', 'model-outputs/epoch=0-step=95-v4.ckpt', 'model-outputs/epoch=0-step=95-v5.ckpt', 'model-outputs/epoch=3-step=380.ckpt', 'model-outputs/epoch=4-step=475.ckpt', 'model-outputs/epoch=1-step=190.ckpt', 'model-outputs/epoch=1-step=190-v1.ckpt', 'model-outputs/epoch=0-step=95-v6.ckpt', 'model-outputs/epoch=2-step=285.ckpt', 'model-outputs/epoch=4-step=475-v1.ckpt', 'model-outputs/epoch=0-step=95-v7.ckpt', 'model-outputs/epoch=1-step=190-v2.ckpt', 'model-outputs/epoch=4-step=475-v2.ckpt', 'model-outputs/epoch=1-step=190-v3.ckpt', 'model-outputs/epoch=0-step=95-v8.ckpt', 'model-outputs/epoch=1-step=190-v4.ckpt', 'model-outputs/epoch=0-step=95-v9.ckpt', 'model-outputs/epoch=1-step=190-v5.ckpt', 'model-outputs/epoch=1-step=190-v6.ckpt', 'model-outputs/epoch=1-step=190-v7.ckpt', 'model-outputs/epoch=0-step=95-v10.ckpt', 'model-outputs/e

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).

  0%|          | 0/135 [00:00<?, ?it/s][A
  1%|          | 1/135 [00:06<14:14,  6.38s/it][A
  1%|▏         | 2/135 [00:07<06:47,  3.06s/it][A
  2%|▏         | 3/135 [00:07<04:23,  1.99s/it][A
  3%|▎         | 4/135 [00:08<03:13,  1.48s/it][A
  4%|▎         | 5/135 [00:09<02:35,  1.19s/it][


 model: roberta-base & vgg19_bn, 
 accuracy: 0.6148148148148148, 
 auc_roc: 0.553490990990991, 
 f1 score: 0.29729729729729726
distilroberta-base densenet161


Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaModel: ['lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU

Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric avg_val_loss improved. New best score: 0.685
INFO:pytorch_lightning.utilities.rank_zero:Epoch 0, global step 95: 'avg_val_loss' reached 0.68466 (best 0.68466), saving model to '/content/drive/MyDrive/DL7643/finalProject/hateful_memes/model-outputs/epoch=0-step=95-v14.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric avg_val_loss improved by 0.009 >= min_delta = 0.001. New best score: 0.676
INFO:pytorch_lightning.utilities.rank_zero:Epoch 1, global step 190: 'avg_val_loss' reached 0.67616 (best 0.67616), saving model to '/content/drive/MyDrive/DL7643/finalProject/hateful_memes/model-outputs/epoch=1-step=190-v10.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 2, global step 285: 'avg_val_loss' was not in top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 3, global step 380: 'avg_val_loss' was not in top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Monitored metric avg_val_loss did not improve in the last 3 records. Best score: 0.676. Signaling Trainer to stop.
INFO:pytorch_lightning.utilities.rank_zero:Epoch 4, global step 475: 'avg_val_loss' was not in top 1


['model-outputs/epoch=0-step=95.ckpt', 'model-outputs/epoch=0-step=95-v1.ckpt', 'model-outputs/epoch=0-step=95-v2.ckpt', 'model-outputs/epoch=0-step=95-v3.ckpt', 'model-outputs/epoch=0-step=95-v4.ckpt', 'model-outputs/epoch=0-step=95-v5.ckpt', 'model-outputs/epoch=3-step=380.ckpt', 'model-outputs/epoch=4-step=475.ckpt', 'model-outputs/epoch=1-step=190.ckpt', 'model-outputs/epoch=1-step=190-v1.ckpt', 'model-outputs/epoch=0-step=95-v6.ckpt', 'model-outputs/epoch=2-step=285.ckpt', 'model-outputs/epoch=4-step=475-v1.ckpt', 'model-outputs/epoch=0-step=95-v7.ckpt', 'model-outputs/epoch=1-step=190-v2.ckpt', 'model-outputs/epoch=4-step=475-v2.ckpt', 'model-outputs/epoch=1-step=190-v3.ckpt', 'model-outputs/epoch=0-step=95-v8.ckpt', 'model-outputs/epoch=1-step=190-v4.ckpt', 'model-outputs/epoch=0-step=95-v9.ckpt', 'model-outputs/epoch=1-step=190-v5.ckpt', 'model-outputs/epoch=1-step=190-v6.ckpt', 'model-outputs/epoch=1-step=190-v7.ckpt', 'model-outputs/epoch=0-step=95-v10.ckpt', 'model-outputs/e

Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaModel: ['lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).

  0%|          | 0/135 [00:00<?, ?it/s][A
  1%|          | 1/135 [00:04<09:46,  4.38s/it][A
  1%|▏         | 2/135 [00:04<04:37,  2.09s/it][A
  2%|▏         | 3/135 [00:05<02:58,  1.36s/it][A
  3%|▎         | 4/135 [00:05<02:11,  1.01s/it][A
  4%|▎         | 5/135 [00:06<01:45,  1.23i


 model: distilroberta-base & densenet161, 
 accuracy: 0.5925925925925926, 
 auc_roc: 0.5632352941176471, 
 f1 score: 0.45
distilroberta-base vgg19_bn


Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaModel: ['lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU

Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric avg_val_loss improved. New best score: 0.677
INFO:pytorch_lightning.utilities.rank_zero:Epoch 0, global step 95: 'avg_val_loss' reached 0.67656 (best 0.67656), saving model to '/content/drive/MyDrive/DL7643/finalProject/hateful_memes/model-outputs/epoch=0-step=95-v14.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 1, global step 190: 'avg_val_loss' was not in top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 2, global step 285: 'avg_val_loss' was not in top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Monitored metric avg_val_loss did not improve in the last 3 records. Best score: 0.677. Signaling Trainer to stop.
INFO:pytorch_lightning.utilities.rank_zero:Epoch 3, global step 380: 'avg_val_loss' was not in top 1


['model-outputs/epoch=0-step=95.ckpt', 'model-outputs/epoch=0-step=95-v1.ckpt', 'model-outputs/epoch=0-step=95-v2.ckpt', 'model-outputs/epoch=0-step=95-v3.ckpt', 'model-outputs/epoch=0-step=95-v4.ckpt', 'model-outputs/epoch=0-step=95-v5.ckpt', 'model-outputs/epoch=3-step=380.ckpt', 'model-outputs/epoch=4-step=475.ckpt', 'model-outputs/epoch=1-step=190.ckpt', 'model-outputs/epoch=1-step=190-v1.ckpt', 'model-outputs/epoch=0-step=95-v6.ckpt', 'model-outputs/epoch=2-step=285.ckpt', 'model-outputs/epoch=4-step=475-v1.ckpt', 'model-outputs/epoch=0-step=95-v7.ckpt', 'model-outputs/epoch=1-step=190-v2.ckpt', 'model-outputs/epoch=4-step=475-v2.ckpt', 'model-outputs/epoch=1-step=190-v3.ckpt', 'model-outputs/epoch=0-step=95-v8.ckpt', 'model-outputs/epoch=1-step=190-v4.ckpt', 'model-outputs/epoch=0-step=95-v9.ckpt', 'model-outputs/epoch=1-step=190-v5.ckpt', 'model-outputs/epoch=1-step=190-v6.ckpt', 'model-outputs/epoch=1-step=190-v7.ckpt', 'model-outputs/epoch=0-step=95-v10.ckpt', 'model-outputs/e

Some weights of the model checkpoint at distilroberta-base were not used when initializing RobertaModel: ['lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).

  0%|          | 0/135 [00:00<?, ?it/s][A
  1%|          | 1/135 [00:05<12:32,  5.62s/it][A
  1%|▏         | 2/135 [00:06<05:51,  2.65s/it][A
  2%|▏         | 3/135 [00:06<03:44,  1.70s/it][A
  3%|▎         | 4/135 [00:07<02:45,  1.26s/it][A
  4%|▎         | 5/135 [00:07<02:13,  1.02s


 model: distilroberta-base & vgg19_bn, 
 accuracy: 0.6092592592592593, 
 auc_roc: 0.572641878669276, 
 f1 score: 0.43733333333333335
gpt2 densenet161


Downloading:   0%|          | 0.00/665 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/548M [00:00<?, ?B/s]

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name  | Type                    | Params
--------------------------------------------------
0 | model | LanguageAndVisionConcat | 155 M 
--------------------------------------------------
155 M     Trainable params
0         Non-trainable params
155 M     Total params
622.493   Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric avg_val_loss improved. New best score: 0.707
INFO:pytorch_lightning.utilities.rank_zero:Epoch 0, global step 95: 'avg_val_loss' reached 0.70685 (best 0.70685), saving model to '/content/drive/MyDrive/DL7643/finalProject/hateful_memes/model-outputs/epoch=0-step=95-v15.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 1, global step 190: 'avg_val_loss' was not in top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 2, global step 285: 'avg_val_loss' was not in top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Monitored metric avg_val_loss did not improve in the last 3 records. Best score: 0.707. Signaling Trainer to stop.
INFO:pytorch_lightning.utilities.rank_zero:Epoch 3, global step 380: 'avg_val_loss' was not in top 1


['model-outputs/epoch=0-step=95.ckpt', 'model-outputs/epoch=0-step=95-v1.ckpt', 'model-outputs/epoch=0-step=95-v2.ckpt', 'model-outputs/epoch=0-step=95-v3.ckpt', 'model-outputs/epoch=0-step=95-v4.ckpt', 'model-outputs/epoch=0-step=95-v5.ckpt', 'model-outputs/epoch=3-step=380.ckpt', 'model-outputs/epoch=4-step=475.ckpt', 'model-outputs/epoch=1-step=190.ckpt', 'model-outputs/epoch=1-step=190-v1.ckpt', 'model-outputs/epoch=0-step=95-v6.ckpt', 'model-outputs/epoch=2-step=285.ckpt', 'model-outputs/epoch=4-step=475-v1.ckpt', 'model-outputs/epoch=0-step=95-v7.ckpt', 'model-outputs/epoch=1-step=190-v2.ckpt', 'model-outputs/epoch=4-step=475-v2.ckpt', 'model-outputs/epoch=1-step=190-v3.ckpt', 'model-outputs/epoch=0-step=95-v8.ckpt', 'model-outputs/epoch=1-step=190-v4.ckpt', 'model-outputs/epoch=0-step=95-v9.ckpt', 'model-outputs/epoch=1-step=190-v5.ckpt', 'model-outputs/epoch=1-step=190-v6.ckpt', 'model-outputs/epoch=1-step=190-v7.ckpt', 'model-outputs/epoch=0-step=95-v10.ckpt', 'model-outputs/e


  0%|          | 0/135 [00:00<?, ?it/s][A
  1%|          | 1/135 [00:05<11:19,  5.07s/it][A
  1%|▏         | 2/135 [00:05<05:22,  2.43s/it][A
  2%|▏         | 3/135 [00:06<03:29,  1.59s/it][A
  3%|▎         | 4/135 [00:06<02:35,  1.19s/it][A
  4%|▎         | 5/135 [00:07<02:04,  1.05it/s][A
  4%|▍         | 6/135 [00:07<01:45,  1.22it/s][A
  5%|▌         | 7/135 [00:08<01:34,  1.36it/s][A
  6%|▌         | 8/135 [00:09<01:26,  1.46it/s][A
  7%|▋         | 9/135 [00:09<01:23,  1.52it/s][A
  7%|▋         | 10/135 [00:10<01:19,  1.57it/s][A
  8%|▊         | 11/135 [00:10<01:16,  1.63it/s][A
  9%|▉         | 12/135 [00:11<01:13,  1.68it/s][A
 10%|▉         | 13/135 [00:11<01:10,  1.74it/s][A
 10%|█         | 14/135 [00:12<01:08,  1.77it/s][A
 11%|█         | 15/135 [00:12<01:06,  1.80it/s][A
 12%|█▏        | 16/135 [00:13<01:06,  1.79it/s][A
 13%|█▎        | 17/135 [00:14<01:06,  1.76it/s][A
 13%|█▎        | 18/135 [00:14<01:06,  1.77it/s][A
 14%|█▍        | 19/135 [00:1


 model: gpt2 & densenet161, 
 accuracy: 0.5925925925925926, 
 auc_roc: 0.5231481481481481, 
 f1 score: 0.2857142857142857
gpt2 vgg19_bn


INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name  | Type                    | Params
--------------------------------------------------
0 | model | LanguageAndVisionConcat | 270 M 
--------------------------------------------------
270 M     Trainable params
0         Non-trainable params
270 M     Total params
1,082.482 Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric avg_val_loss improved. New best score: 0.713
INFO:pytorch_lightning.utilities.rank_zero:Epoch 0, global step 95: 'avg_val_loss' reached 0.71273 (best 0.71273), saving model to '/content/drive/MyDrive/DL7643/finalProject/hateful_memes/model-outputs/epoch=0-step=95-v16.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 1, global step 190: 'avg_val_loss' was not in top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 2, global step 285: 'avg_val_loss' was not in top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Monitored metric avg_val_loss did not improve in the last 3 records. Best score: 0.713. Signaling Trainer to stop.
INFO:pytorch_lightning.utilities.rank_zero:Epoch 3, global step 380: 'avg_val_loss' was not in top 1


['model-outputs/epoch=0-step=95.ckpt', 'model-outputs/epoch=0-step=95-v1.ckpt', 'model-outputs/epoch=0-step=95-v2.ckpt', 'model-outputs/epoch=0-step=95-v3.ckpt', 'model-outputs/epoch=0-step=95-v4.ckpt', 'model-outputs/epoch=0-step=95-v5.ckpt', 'model-outputs/epoch=3-step=380.ckpt', 'model-outputs/epoch=4-step=475.ckpt', 'model-outputs/epoch=1-step=190.ckpt', 'model-outputs/epoch=1-step=190-v1.ckpt', 'model-outputs/epoch=0-step=95-v6.ckpt', 'model-outputs/epoch=2-step=285.ckpt', 'model-outputs/epoch=4-step=475-v1.ckpt', 'model-outputs/epoch=0-step=95-v7.ckpt', 'model-outputs/epoch=1-step=190-v2.ckpt', 'model-outputs/epoch=4-step=475-v2.ckpt', 'model-outputs/epoch=1-step=190-v3.ckpt', 'model-outputs/epoch=0-step=95-v8.ckpt', 'model-outputs/epoch=1-step=190-v4.ckpt', 'model-outputs/epoch=0-step=95-v9.ckpt', 'model-outputs/epoch=1-step=190-v5.ckpt', 'model-outputs/epoch=1-step=190-v6.ckpt', 'model-outputs/epoch=1-step=190-v7.ckpt', 'model-outputs/epoch=0-step=95-v10.ckpt', 'model-outputs/e


  0%|          | 0/135 [00:00<?, ?it/s][A
  1%|          | 1/135 [00:06<14:44,  6.60s/it][A
  1%|▏         | 2/135 [00:07<06:51,  3.09s/it][A
  2%|▏         | 3/135 [00:07<04:17,  1.95s/it][A
  3%|▎         | 4/135 [00:08<03:06,  1.42s/it][A
  4%|▎         | 5/135 [00:09<02:26,  1.13s/it][A
  4%|▍         | 6/135 [00:09<02:02,  1.05it/s][A
  5%|▌         | 7/135 [00:10<01:47,  1.19it/s][A
  6%|▌         | 8/135 [00:10<01:37,  1.30it/s][A
  7%|▋         | 9/135 [00:11<01:32,  1.37it/s][A
  7%|▋         | 10/135 [00:12<01:26,  1.45it/s][A
  8%|▊         | 11/135 [00:12<01:22,  1.50it/s][A
  9%|▉         | 12/135 [00:13<01:20,  1.54it/s][A
 10%|▉         | 13/135 [00:13<01:18,  1.56it/s][A
 10%|█         | 14/135 [00:14<01:17,  1.55it/s][A
 11%|█         | 15/135 [00:15<01:17,  1.54it/s][A
 12%|█▏        | 16/135 [00:15<01:16,  1.55it/s][A
 13%|█▎        | 17/135 [00:16<01:14,  1.58it/s][A
 13%|█▎        | 18/135 [00:17<01:14,  1.58it/s][A
 14%|█▍        | 19/135 [00:1


 model: gpt2 & vgg19_bn, 
 accuracy: 0.5962962962962963, 
 auc_roc: 0.532122559920929, 
 f1 score: 0.3057324840764331
distilgpt2 densenet161


Downloading:   0%|          | 0.00/762 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/353M [00:00<?, ?B/s]

Some weights of the model checkpoint at distilgpt2 were not used when initializing GPT2Model: ['lm_head.weight']
- This IS expected if you are initializing GPT2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing GPT2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.call

Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric avg_val_loss improved. New best score: 0.708
INFO:pytorch_lightning.utilities.rank_zero:Epoch 0, global step 95: 'avg_val_loss' reached 0.70781 (best 0.70781), saving model to '/content/drive/MyDrive/DL7643/finalProject/hateful_memes/model-outputs/epoch=0-step=95-v17.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 1, global step 190: 'avg_val_loss' was not in top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 2, global step 285: 'avg_val_loss' was not in top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Monitored metric avg_val_loss did not improve in the last 3 records. Best score: 0.708. Signaling Trainer to stop.
INFO:pytorch_lightning.utilities.rank_zero:Epoch 3, global step 380: 'avg_val_loss' was not in top 1


['model-outputs/epoch=0-step=95.ckpt', 'model-outputs/epoch=0-step=95-v1.ckpt', 'model-outputs/epoch=0-step=95-v2.ckpt', 'model-outputs/epoch=0-step=95-v3.ckpt', 'model-outputs/epoch=0-step=95-v4.ckpt', 'model-outputs/epoch=0-step=95-v5.ckpt', 'model-outputs/epoch=3-step=380.ckpt', 'model-outputs/epoch=4-step=475.ckpt', 'model-outputs/epoch=1-step=190.ckpt', 'model-outputs/epoch=1-step=190-v1.ckpt', 'model-outputs/epoch=0-step=95-v6.ckpt', 'model-outputs/epoch=2-step=285.ckpt', 'model-outputs/epoch=4-step=475-v1.ckpt', 'model-outputs/epoch=0-step=95-v7.ckpt', 'model-outputs/epoch=1-step=190-v2.ckpt', 'model-outputs/epoch=4-step=475-v2.ckpt', 'model-outputs/epoch=1-step=190-v3.ckpt', 'model-outputs/epoch=0-step=95-v8.ckpt', 'model-outputs/epoch=1-step=190-v4.ckpt', 'model-outputs/epoch=0-step=95-v9.ckpt', 'model-outputs/epoch=1-step=190-v5.ckpt', 'model-outputs/epoch=1-step=190-v6.ckpt', 'model-outputs/epoch=1-step=190-v7.ckpt', 'model-outputs/epoch=0-step=95-v10.ckpt', 'model-outputs/e

Some weights of the model checkpoint at distilgpt2 were not used when initializing GPT2Model: ['lm_head.weight']
- This IS expected if you are initializing GPT2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing GPT2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).

  0%|          | 0/135 [00:00<?, ?it/s][A
  1%|          | 1/135 [00:04<10:42,  4.79s/it][A
  1%|▏         | 2/135 [00:05<04:59,  2.25s/it][A
  2%|▏         | 3/135 [00:05<03:10,  1.44s/it][A
  3%|▎         | 4/135 [00:06<02:19,  1.06s/it][A
  4%|▎         | 5/135 [00:06<01:50,  1.18it/s][A
  4%|▍         | 6/135 [00:07<01:32,  1.40it/s][A
  5%|▌         | 7/135 [00:07<01:21,  1.57it/s][A
  6%|▌         | 8/135 [00:08<01:


 model: distilgpt2 & densenet161, 
 accuracy: 0.6037037037037037, 
 auc_roc: 0.5299175382139983, 
 f1 score: 0.2569444444444445
distilgpt2 vgg19_bn


Some weights of the model checkpoint at distilgpt2 were not used when initializing GPT2Model: ['lm_head.weight']
- This IS expected if you are initializing GPT2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing GPT2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.call

Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric avg_val_loss improved. New best score: 0.705
INFO:pytorch_lightning.utilities.rank_zero:Epoch 0, global step 95: 'avg_val_loss' reached 0.70473 (best 0.70473), saving model to '/content/drive/MyDrive/DL7643/finalProject/hateful_memes/model-outputs/epoch=0-step=95-v18.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 1, global step 190: 'avg_val_loss' was not in top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 2, global step 285: 'avg_val_loss' was not in top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Monitored metric avg_val_loss did not improve in the last 3 records. Best score: 0.705. Signaling Trainer to stop.
INFO:pytorch_lightning.utilities.rank_zero:Epoch 3, global step 380: 'avg_val_loss' was not in top 1


['model-outputs/epoch=0-step=95.ckpt', 'model-outputs/epoch=0-step=95-v1.ckpt', 'model-outputs/epoch=0-step=95-v2.ckpt', 'model-outputs/epoch=0-step=95-v3.ckpt', 'model-outputs/epoch=0-step=95-v4.ckpt', 'model-outputs/epoch=0-step=95-v5.ckpt', 'model-outputs/epoch=3-step=380.ckpt', 'model-outputs/epoch=4-step=475.ckpt', 'model-outputs/epoch=1-step=190.ckpt', 'model-outputs/epoch=1-step=190-v1.ckpt', 'model-outputs/epoch=0-step=95-v6.ckpt', 'model-outputs/epoch=2-step=285.ckpt', 'model-outputs/epoch=4-step=475-v1.ckpt', 'model-outputs/epoch=0-step=95-v7.ckpt', 'model-outputs/epoch=1-step=190-v2.ckpt', 'model-outputs/epoch=4-step=475-v2.ckpt', 'model-outputs/epoch=1-step=190-v3.ckpt', 'model-outputs/epoch=0-step=95-v8.ckpt', 'model-outputs/epoch=1-step=190-v4.ckpt', 'model-outputs/epoch=0-step=95-v9.ckpt', 'model-outputs/epoch=1-step=190-v5.ckpt', 'model-outputs/epoch=1-step=190-v6.ckpt', 'model-outputs/epoch=1-step=190-v7.ckpt', 'model-outputs/epoch=0-step=95-v10.ckpt', 'model-outputs/e

Some weights of the model checkpoint at distilgpt2 were not used when initializing GPT2Model: ['lm_head.weight']
- This IS expected if you are initializing GPT2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing GPT2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).

  0%|          | 0/135 [00:00<?, ?it/s][A
  1%|          | 1/135 [00:05<13:05,  5.86s/it][A
  1%|▏         | 2/135 [00:06<06:10,  2.79s/it][A
  2%|▏         | 3/135 [00:07<03:57,  1.80s/it][A
  3%|▎         | 4/135 [00:07<02:54,  1.34s/it][A
  4%|▎         | 5/135 [00:08<02:20,  1.08s/it][A
  4%|▍         | 6/135 [00:08<01:58,  1.09it/s][A
  5%|▌         | 7/135 [00:09<01:44,  1.23it/s][A
  6%|▌         | 8/135 [00:10<01:


 model: distilgpt2 & vgg19_bn, 
 accuracy: 0.6092592592592593, 
 auc_roc: 0.5462346918063105, 
 f1 score: 0.29900332225913623
albert-base-v2 densenet161


Downloading:   0%|          | 0.00/684 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/760k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.31M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/47.4M [00:00<?, ?B/s]

Some weights of the model checkpoint at albert-base-v2 were not used when initializing AlbertModel: ['predictions.LayerNorm.weight', 'predictions.bias', 'predictions.decoder.weight', 'predictions.decoder.bias', 'predictions.dense.bias', 'predictions.dense.weight', 'predictions.LayerNorm.bias']
- This IS expected if you are initializing AlbertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INF

Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric avg_val_loss improved. New best score: 0.711
INFO:pytorch_lightning.utilities.rank_zero:Epoch 0, global step 95: 'avg_val_loss' reached 0.71073 (best 0.71073), saving model to '/content/drive/MyDrive/DL7643/finalProject/hateful_memes/model-outputs/epoch=0-step=95-v19.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 1, global step 190: 'avg_val_loss' reached 0.71034 (best 0.71034), saving model to '/content/drive/MyDrive/DL7643/finalProject/hateful_memes/model-outputs/epoch=1-step=190-v11.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric avg_val_loss improved by 0.018 >= min_delta = 0.001. New best score: 0.693
INFO:pytorch_lightning.utilities.rank_zero:Epoch 2, global step 285: 'avg_val_loss' reached 0.69267 (best 0.69267), saving model to '/content/drive/MyDrive/DL7643/finalProject/hateful_memes/model-outputs/epoch=2-step=285-v1.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 3, global step 380: 'avg_val_loss' was not in top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 4, global step 475: 'avg_val_loss' was not in top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Monitored metric avg_val_loss did not improve in the last 3 records. Best score: 0.693. Signaling Trainer to stop.
INFO:pytorch_lightning.utilities.rank_zero:Epoch 5, global step 570: 'avg_val_loss' was not in top 1


['model-outputs/epoch=0-step=95.ckpt', 'model-outputs/epoch=0-step=95-v1.ckpt', 'model-outputs/epoch=0-step=95-v2.ckpt', 'model-outputs/epoch=0-step=95-v3.ckpt', 'model-outputs/epoch=0-step=95-v4.ckpt', 'model-outputs/epoch=0-step=95-v5.ckpt', 'model-outputs/epoch=3-step=380.ckpt', 'model-outputs/epoch=4-step=475.ckpt', 'model-outputs/epoch=1-step=190.ckpt', 'model-outputs/epoch=1-step=190-v1.ckpt', 'model-outputs/epoch=0-step=95-v6.ckpt', 'model-outputs/epoch=2-step=285.ckpt', 'model-outputs/epoch=4-step=475-v1.ckpt', 'model-outputs/epoch=0-step=95-v7.ckpt', 'model-outputs/epoch=1-step=190-v2.ckpt', 'model-outputs/epoch=4-step=475-v2.ckpt', 'model-outputs/epoch=1-step=190-v3.ckpt', 'model-outputs/epoch=0-step=95-v8.ckpt', 'model-outputs/epoch=1-step=190-v4.ckpt', 'model-outputs/epoch=0-step=95-v9.ckpt', 'model-outputs/epoch=1-step=190-v5.ckpt', 'model-outputs/epoch=1-step=190-v6.ckpt', 'model-outputs/epoch=1-step=190-v7.ckpt', 'model-outputs/epoch=0-step=95-v10.ckpt', 'model-outputs/e

Some weights of the model checkpoint at albert-base-v2 were not used when initializing AlbertModel: ['predictions.LayerNorm.weight', 'predictions.bias', 'predictions.decoder.weight', 'predictions.decoder.bias', 'predictions.dense.bias', 'predictions.dense.weight', 'predictions.LayerNorm.bias']
- This IS expected if you are initializing AlbertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).

  0%|          | 0/135 [00:00<?, ?it/s][A
  1%|          | 1/135 [00:04<09:58,  4.47s/it][A
  1%|▏         | 2/135 [00:05<04:46,  2.15s/it][A
  2%|▏         | 3/135 [00:05<03:07,  1.42s/it][A
  3%|▎         | 4/135 [00:06<02:19,  1.07s/it][A



 model: albert-base-v2 & densenet161, 
 accuracy: 0.5833333333333334, 
 auc_roc: 0.5413635715304521, 
 f1 score: 0.39353099730458224
albert-base-v2 vgg19_bn


Some weights of the model checkpoint at albert-base-v2 were not used when initializing AlbertModel: ['predictions.LayerNorm.weight', 'predictions.bias', 'predictions.decoder.weight', 'predictions.decoder.bias', 'predictions.dense.bias', 'predictions.dense.weight', 'predictions.LayerNorm.bias']
- This IS expected if you are initializing AlbertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INF

Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric avg_val_loss improved. New best score: 0.697
INFO:pytorch_lightning.utilities.rank_zero:Epoch 0, global step 95: 'avg_val_loss' reached 0.69743 (best 0.69743), saving model to '/content/drive/MyDrive/DL7643/finalProject/hateful_memes/model-outputs/epoch=0-step=95-v19.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 1, global step 190: 'avg_val_loss' was not in top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 2, global step 285: 'avg_val_loss' was not in top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Monitored metric avg_val_loss did not improve in the last 3 records. Best score: 0.697. Signaling Trainer to stop.
INFO:pytorch_lightning.utilities.rank_zero:Epoch 3, global step 380: 'avg_val_loss' was not in top 1


['model-outputs/epoch=0-step=95.ckpt', 'model-outputs/epoch=0-step=95-v1.ckpt', 'model-outputs/epoch=0-step=95-v2.ckpt', 'model-outputs/epoch=0-step=95-v3.ckpt', 'model-outputs/epoch=0-step=95-v4.ckpt', 'model-outputs/epoch=0-step=95-v5.ckpt', 'model-outputs/epoch=3-step=380.ckpt', 'model-outputs/epoch=4-step=475.ckpt', 'model-outputs/epoch=1-step=190.ckpt', 'model-outputs/epoch=1-step=190-v1.ckpt', 'model-outputs/epoch=0-step=95-v6.ckpt', 'model-outputs/epoch=2-step=285.ckpt', 'model-outputs/epoch=4-step=475-v1.ckpt', 'model-outputs/epoch=0-step=95-v7.ckpt', 'model-outputs/epoch=1-step=190-v2.ckpt', 'model-outputs/epoch=4-step=475-v2.ckpt', 'model-outputs/epoch=1-step=190-v3.ckpt', 'model-outputs/epoch=0-step=95-v8.ckpt', 'model-outputs/epoch=1-step=190-v4.ckpt', 'model-outputs/epoch=0-step=95-v9.ckpt', 'model-outputs/epoch=1-step=190-v5.ckpt', 'model-outputs/epoch=1-step=190-v6.ckpt', 'model-outputs/epoch=1-step=190-v7.ckpt', 'model-outputs/epoch=0-step=95-v10.ckpt', 'model-outputs/e

Some weights of the model checkpoint at albert-base-v2 were not used when initializing AlbertModel: ['predictions.LayerNorm.weight', 'predictions.bias', 'predictions.decoder.weight', 'predictions.decoder.bias', 'predictions.dense.bias', 'predictions.dense.weight', 'predictions.LayerNorm.bias']
- This IS expected if you are initializing AlbertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).

  0%|          | 0/135 [00:00<?, ?it/s][A
  1%|          | 1/135 [00:04<11:05,  4.96s/it][A
  1%|▏         | 2/135 [00:05<05:18,  2.40s/it][A
  2%|▏         | 3/135 [00:06<03:28,  1.58s/it][A
  3%|▎         | 4/135 [00:06<02:37,  1.20s/it][A



 model: albert-base-v2 & vgg19_bn, 
 accuracy: 0.5740740740740741, 
 auc_roc: 0.5338320193680551, 
 f1 score: 0.3915343915343915
xlm-roberta-base densenet161


Downloading:   0%|          | 0.00/615 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/9.10M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.12G [00:00<?, ?B/s]

Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaModel: ['lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing XLMRobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_z

Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric avg_val_loss improved. New best score: 0.704
INFO:pytorch_lightning.utilities.rank_zero:Epoch 0, global step 95: 'avg_val_loss' reached 0.70432 (best 0.70432), saving model to '/content/drive/MyDrive/DL7643/finalProject/hateful_memes/model-outputs/epoch=0-step=95-v20.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric avg_val_loss improved by 0.015 >= min_delta = 0.001. New best score: 0.690
INFO:pytorch_lightning.utilities.rank_zero:Epoch 1, global step 190: 'avg_val_loss' reached 0.68962 (best 0.68962), saving model to '/content/drive/MyDrive/DL7643/finalProject/hateful_memes/model-outputs/epoch=1-step=190-v11.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 2, global step 285: 'avg_val_loss' was not in top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 3, global step 380: 'avg_val_loss' was not in top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Monitored metric avg_val_loss did not improve in the last 3 records. Best score: 0.690. Signaling Trainer to stop.
INFO:pytorch_lightning.utilities.rank_zero:Epoch 4, global step 475: 'avg_val_loss' was not in top 1


['model-outputs/epoch=0-step=95.ckpt', 'model-outputs/epoch=0-step=95-v1.ckpt', 'model-outputs/epoch=0-step=95-v2.ckpt', 'model-outputs/epoch=0-step=95-v3.ckpt', 'model-outputs/epoch=0-step=95-v4.ckpt', 'model-outputs/epoch=0-step=95-v5.ckpt', 'model-outputs/epoch=3-step=380.ckpt', 'model-outputs/epoch=4-step=475.ckpt', 'model-outputs/epoch=1-step=190.ckpt', 'model-outputs/epoch=1-step=190-v1.ckpt', 'model-outputs/epoch=0-step=95-v6.ckpt', 'model-outputs/epoch=2-step=285.ckpt', 'model-outputs/epoch=4-step=475-v1.ckpt', 'model-outputs/epoch=0-step=95-v7.ckpt', 'model-outputs/epoch=1-step=190-v2.ckpt', 'model-outputs/epoch=4-step=475-v2.ckpt', 'model-outputs/epoch=1-step=190-v3.ckpt', 'model-outputs/epoch=0-step=95-v8.ckpt', 'model-outputs/epoch=1-step=190-v4.ckpt', 'model-outputs/epoch=0-step=95-v9.ckpt', 'model-outputs/epoch=1-step=190-v5.ckpt', 'model-outputs/epoch=1-step=190-v6.ckpt', 'model-outputs/epoch=1-step=190-v7.ckpt', 'model-outputs/epoch=0-step=95-v10.ckpt', 'model-outputs/e

Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaModel: ['lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing XLMRobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).

  0%|          | 0/135 [00:00<?, ?it/s][A
  1%|          | 1/135 [00:09<21:45,  9.74s/it][A
  1%|▏         | 2/135 [00:10<09:50,  4.44s/it][A
  2%|▏         | 3/135 [00:11<05:59,  2.72s/it][A
  3%|▎         | 4/135 [00:11<04:11,  1.92s/it][A
  4%|▎         | 5/135 [00:12<03:12,


 model: xlm-roberta-base & densenet161, 
 accuracy: 0.5944444444444444, 
 auc_roc: 0.5725472624077156, 
 f1 score: 0.477326968973747
xlm-roberta-base vgg19_bn


Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaModel: ['lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing XLMRobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_z

Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric avg_val_loss improved. New best score: 0.701
INFO:pytorch_lightning.utilities.rank_zero:Epoch 0, global step 95: 'avg_val_loss' reached 0.70110 (best 0.70110), saving model to '/content/drive/MyDrive/DL7643/finalProject/hateful_memes/model-outputs/epoch=0-step=95-v20.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 1, global step 190: 'avg_val_loss' was not in top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 2, global step 285: 'avg_val_loss' reached 0.70087 (best 0.70087), saving model to '/content/drive/MyDrive/DL7643/finalProject/hateful_memes/model-outputs/epoch=2-step=285-v2.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Monitored metric avg_val_loss did not improve in the last 3 records. Best score: 0.701. Signaling Trainer to stop.
INFO:pytorch_lightning.utilities.rank_zero:Epoch 3, global step 380: 'avg_val_loss' was not in top 1


['model-outputs/epoch=0-step=95.ckpt', 'model-outputs/epoch=0-step=95-v1.ckpt', 'model-outputs/epoch=0-step=95-v2.ckpt', 'model-outputs/epoch=0-step=95-v3.ckpt', 'model-outputs/epoch=0-step=95-v4.ckpt', 'model-outputs/epoch=0-step=95-v5.ckpt', 'model-outputs/epoch=3-step=380.ckpt', 'model-outputs/epoch=4-step=475.ckpt', 'model-outputs/epoch=1-step=190.ckpt', 'model-outputs/epoch=1-step=190-v1.ckpt', 'model-outputs/epoch=0-step=95-v6.ckpt', 'model-outputs/epoch=2-step=285.ckpt', 'model-outputs/epoch=4-step=475-v1.ckpt', 'model-outputs/epoch=0-step=95-v7.ckpt', 'model-outputs/epoch=1-step=190-v2.ckpt', 'model-outputs/epoch=4-step=475-v2.ckpt', 'model-outputs/epoch=1-step=190-v3.ckpt', 'model-outputs/epoch=0-step=95-v8.ckpt', 'model-outputs/epoch=1-step=190-v4.ckpt', 'model-outputs/epoch=0-step=95-v9.ckpt', 'model-outputs/epoch=1-step=190-v5.ckpt', 'model-outputs/epoch=1-step=190-v6.ckpt', 'model-outputs/epoch=1-step=190-v7.ckpt', 'model-outputs/epoch=0-step=95-v10.ckpt', 'model-outputs/e

Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaModel: ['lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing XLMRobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).

  0%|          | 0/135 [00:00<?, ?it/s][A
  1%|          | 1/135 [00:09<21:07,  9.46s/it][A
  1%|▏         | 2/135 [00:10<09:38,  4.35s/it][A
  2%|▏         | 3/135 [00:11<05:59,  2.72s/it][A
  3%|▎         | 4/135 [00:11<04:13,  1.93s/it][A
  4%|▎         | 5/135 [00:12<03:13,


 model: xlm-roberta-base & vgg19_bn, 
 accuracy: 0.5648148148148148, 
 auc_roc: 0.5391954171512254, 
 f1 score: 0.43099273607748184
squeezebert/squeezebert-uncased densenet161


Downloading:   0%|          | 0.00/500 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/103M [00:00<?, ?B/s]

Some weights of the model checkpoint at squeezebert/squeezebert-uncased were not used when initializing SqueezeBertModel: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing SqueezeBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing SqueezeBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_

Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric avg_val_loss improved. New best score: 0.703
INFO:pytorch_lightning.utilities.rank_zero:Epoch 0, global step 95: 'avg_val_loss' reached 0.70296 (best 0.70296), saving model to '/content/drive/MyDrive/DL7643/finalProject/hateful_memes/model-outputs/epoch=0-step=95-v20.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric avg_val_loss improved by 0.017 >= min_delta = 0.001. New best score: 0.686
INFO:pytorch_lightning.utilities.rank_zero:Epoch 1, global step 190: 'avg_val_loss' reached 0.68593 (best 0.68593), saving model to '/content/drive/MyDrive/DL7643/finalProject/hateful_memes/model-outputs/epoch=1-step=190-v12.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 2, global step 285: 'avg_val_loss' was not in top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 3, global step 380: 'avg_val_loss' was not in top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Monitored metric avg_val_loss did not improve in the last 3 records. Best score: 0.686. Signaling Trainer to stop.
INFO:pytorch_lightning.utilities.rank_zero:Epoch 4, global step 475: 'avg_val_loss' was not in top 1


['model-outputs/epoch=0-step=95.ckpt', 'model-outputs/epoch=0-step=95-v1.ckpt', 'model-outputs/epoch=0-step=95-v2.ckpt', 'model-outputs/epoch=0-step=95-v3.ckpt', 'model-outputs/epoch=0-step=95-v4.ckpt', 'model-outputs/epoch=0-step=95-v5.ckpt', 'model-outputs/epoch=3-step=380.ckpt', 'model-outputs/epoch=4-step=475.ckpt', 'model-outputs/epoch=1-step=190.ckpt', 'model-outputs/epoch=1-step=190-v1.ckpt', 'model-outputs/epoch=0-step=95-v6.ckpt', 'model-outputs/epoch=2-step=285.ckpt', 'model-outputs/epoch=4-step=475-v1.ckpt', 'model-outputs/epoch=0-step=95-v7.ckpt', 'model-outputs/epoch=1-step=190-v2.ckpt', 'model-outputs/epoch=4-step=475-v2.ckpt', 'model-outputs/epoch=1-step=190-v3.ckpt', 'model-outputs/epoch=0-step=95-v8.ckpt', 'model-outputs/epoch=1-step=190-v4.ckpt', 'model-outputs/epoch=0-step=95-v9.ckpt', 'model-outputs/epoch=1-step=190-v5.ckpt', 'model-outputs/epoch=1-step=190-v6.ckpt', 'model-outputs/epoch=1-step=190-v7.ckpt', 'model-outputs/epoch=0-step=95-v10.ckpt', 'model-outputs/e

Some weights of the model checkpoint at squeezebert/squeezebert-uncased were not used when initializing SqueezeBertModel: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing SqueezeBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing SqueezeBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).

  0%|          | 0/135 [00:00<?, ?it/s][A
  1%|          | 1/135 [00:07<16:39,  7.46s/it][A
  1%|▏         | 2/135 [0


 model: squeezebert/squeezebert-uncased & densenet161, 
 accuracy: 0.6, 
 auc_roc: 0.566791586073501, 
 f1 score: 0.44329896907216493
squeezebert/squeezebert-uncased vgg19_bn


Some weights of the model checkpoint at squeezebert/squeezebert-uncased were not used when initializing SqueezeBertModel: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing SqueezeBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing SqueezeBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_

Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Metric avg_val_loss improved. New best score: 0.697
INFO:pytorch_lightning.utilities.rank_zero:Epoch 0, global step 95: 'avg_val_loss' reached 0.69701 (best 0.69701), saving model to '/content/drive/MyDrive/DL7643/finalProject/hateful_memes/model-outputs/epoch=0-step=95-v20.ckpt' as top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 1, global step 190: 'avg_val_loss' was not in top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:Epoch 2, global step 285: 'avg_val_loss' was not in top 1


Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.callbacks.early_stopping:Monitored metric avg_val_loss did not improve in the last 3 records. Best score: 0.697. Signaling Trainer to stop.
INFO:pytorch_lightning.utilities.rank_zero:Epoch 3, global step 380: 'avg_val_loss' reached 0.69658 (best 0.69658), saving model to '/content/drive/MyDrive/DL7643/finalProject/hateful_memes/model-outputs/epoch=3-step=380-v2.ckpt' as top 1


['model-outputs/epoch=0-step=95.ckpt', 'model-outputs/epoch=0-step=95-v1.ckpt', 'model-outputs/epoch=0-step=95-v2.ckpt', 'model-outputs/epoch=0-step=95-v3.ckpt', 'model-outputs/epoch=0-step=95-v4.ckpt', 'model-outputs/epoch=0-step=95-v5.ckpt', 'model-outputs/epoch=3-step=380.ckpt', 'model-outputs/epoch=4-step=475.ckpt', 'model-outputs/epoch=1-step=190.ckpt', 'model-outputs/epoch=1-step=190-v1.ckpt', 'model-outputs/epoch=0-step=95-v6.ckpt', 'model-outputs/epoch=2-step=285.ckpt', 'model-outputs/epoch=4-step=475-v1.ckpt', 'model-outputs/epoch=0-step=95-v7.ckpt', 'model-outputs/epoch=1-step=190-v2.ckpt', 'model-outputs/epoch=4-step=475-v2.ckpt', 'model-outputs/epoch=1-step=190-v3.ckpt', 'model-outputs/epoch=0-step=95-v8.ckpt', 'model-outputs/epoch=1-step=190-v4.ckpt', 'model-outputs/epoch=0-step=95-v9.ckpt', 'model-outputs/epoch=1-step=190-v5.ckpt', 'model-outputs/epoch=1-step=190-v6.ckpt', 'model-outputs/epoch=1-step=190-v7.ckpt', 'model-outputs/epoch=0-step=95-v10.ckpt', 'model-outputs/e

Some weights of the model checkpoint at squeezebert/squeezebert-uncased were not used when initializing SqueezeBertModel: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing SqueezeBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing SqueezeBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).

  0%|          | 0/135 [00:00<?, ?it/s][A
  1%|          | 1/135 [00:07<17:21,  7.77s/it][A
  1%|▏         | 2/135 [0


 model: squeezebert/squeezebert-uncased & vgg19_bn, 
 accuracy: 0.5740740740740741, 
 auc_roc: 0.567534110915493, 
 f1 score: 0.4956140350877193



