In [1]:
#!/usr/bin/env python
# coding: utf-8
import os

from sklearn.model_selection import StratifiedKFold, train_test_split
from config import LSHS_DATAFILE, gpt_filtered_rephrase_lshs_file
import numpy as np
import time
import pandas as pd
from tqdm import tqdm

In [2]:
# import logging

# logging.basicConfig(
#     format='%(asctime)s %(levelname)-8s %(message)s',
#     level=logging.INFO,
#     datefmt='%Y-%m-%d %H:%M:%S')
# # The default levels are DEBUG, INFO, WARNING, ERROR, and CRITICAL.
# print(logging.WARNING)
# logging.getLogger()

In [3]:
import json
gpt_filtered_rephrase_tweets_file = gpt_filtered_rephrase_lshs_file

out_file = open(gpt_filtered_rephrase_tweets_file, "r")
filtered_rephrase_tweet_gpt = json.load(out_file)
out_file.close()

In [4]:
NUM_LABELS = 3
df = pd.read_csv(LSHS_DATAFILE)
domains = df['Domain'].unique().tolist()
for d in domains:
    print(d, df[df['Domain'] == d].shape)

Gender (9454, 4)
Religion (10869, 4)
Race (12013, 4)
Politics (11018, 4)
Sports (12306, 4)


In [5]:
# HateLabel	Final hate label decision 0-Normal, 1-Offensive, 2-Hate

# Setup for PyTorch-Lightning

In [6]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig
from torch.utils.data import DataLoader, Dataset
import torchmetrics
import torch
import pytorch_lightning as pl

  from .autonotebook import tqdm as notebook_tqdm
2023-06-19 03:50:24.133289: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-06-19 03:50:24.166407: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [7]:
os.environ["TOKENIZERS_PARALLELISM"] = "false"
# https://wandb.ai/jack-morris/david-vs-goliath/reports/Does-Model-Size-Matter-A-Comparison-of-BERT-and-DistilBERT--VmlldzoxMDUxNzU
MAX_EPOCHS = 5 #5
BATCH_SIZE = 16*2 #+ int(55 * 0.9*0.5)
LEARNING_RATE = 1e-5
# MODEL_LLM = 'distilbert-base-uncased'
MODEL_LLM = 'bert-base-uncased'

# Setting the seed
pl.seed_everything(42, workers=True)

Global seed set to 42


42

In [8]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_LLM)
print("Tokenizer input max length:", tokenizer.model_max_length)
print("Tokenizer vocabulary size:", tokenizer.vocab_size)

Tokenizer input max length: 512
Tokenizer vocabulary size: 30522


In [9]:
class MyDataset(Dataset):
  def __init__(self, encodings, labels):
    self.encodings = encodings
    self.labels = labels

  def __getitem__(self, idx):
    '''
    encoding.items() -> 
      -> input_ids : [1,34, 32, 67,...]
      -> attention_mask : [1,1,1,1,1,....]
    '''
    item = {key:torch.tensor(val[idx]) for key, val in self.encodings.items()}
    item['labels'] = torch.tensor(self.labels[idx])
    return item

  def __len__(self):
    return len((self.labels))

In [10]:
class LightningModel(pl.LightningModule):
    def __init__(self, model_name_or_path, num_labels, learning_rate=LEARNING_RATE):
        super().__init__()

        self.learning_rate = learning_rate
        self.config = AutoConfig.from_pretrained(model_name_or_path, num_labels=num_labels)
        self.model = AutoModelForSequenceClassification.from_pretrained(model_name_or_path, config=self.config)

        # self.val_conf_mat = torchmetrics.ConfusionMatrix(task="multiclass", num_classes=NUM_LABELS)
        self.val_f1_macro_score = torchmetrics.classification.MulticlassF1Score(average="macro", num_classes=NUM_LABELS)
        self.val_f1_weighted_score = torchmetrics.classification.MulticlassF1Score(average="weighted", num_classes=NUM_LABELS)
        self.val_f1_non_avg_score = torchmetrics.classification.MulticlassF1Score(average="none", num_classes=NUM_LABELS)
        self.val_acc = torchmetrics.Accuracy(task="multiclass", num_classes=NUM_LABELS)
        
        # self.test_conf_mat = torchmetrics.ConfusionMatrix(task="multiclass", num_classes=NUM_LABELS)
        self.test_f1_macro_score = torchmetrics.classification.MulticlassF1Score(average="macro", num_classes=NUM_LABELS)
        self.test_f1_weighted_score = torchmetrics.classification.MulticlassF1Score(average="weighted", num_classes=NUM_LABELS)
        self.test_f1_non_avg_score = torchmetrics.classification.MulticlassF1Score(average="none", num_classes=NUM_LABELS)
        self.test_acc = torchmetrics.Accuracy(task="multiclass", num_classes=NUM_LABELS)
        
        self.metrics = {'val': [('val_f1_macro_score', self.val_f1_macro_score), ('val_f1_weighted_score', self.val_f1_weighted_score), ('val_acc', self.val_acc)],
                         'test': [('F1-Macro', self.test_f1_macro_score), ('F1-Weighted', self.test_f1_weighted_score),
                                  ('F1_Class 0', self.test_f1_non_avg_score.cpu()[0], 'test_f1_non_avg_score'), 
                                  ('F1_Class 1', self.test_f1_non_avg_score.cpu()[1], 'test_f1_non_avg_score'),
                                  ('F1_Class 2', self.test_f1_non_avg_score.cpu()[2], 'test_f1_non_avg_score'),
                                  ('Accuracy', self.test_acc)
                                 ]
                        }
        

    def forward(self, input_ids, attention_mask, labels):
        return self.model(input_ids, attention_mask=attention_mask, labels=labels)
        
    def training_step(self, batch, batch_idx):
        outputs = self(batch["input_ids"], attention_mask=batch["attention_mask"],
                       labels=batch["labels"])        
        self.log("train_loss", outputs["loss"])
        return outputs["loss"]  # this is passed to the optimizer for training

    def echo_metrics(self, key, predicted_labels, batch_labels):
        for itm in self.metrics[key]:
            if len(itm) == 2:
                mt_str, met = itm
                metric_attribute = None
            else:
                mt_str, met, metric_attribute = itm
                # print('this', mt_str, met, metric_attribute)
            met(predicted_labels, batch_labels)
            self.log(mt_str, met, prog_bar=True, metric_attribute=metric_attribute)
        
        
    
    def validation_step(self, batch, batch_idx):
        outputs = self(batch["input_ids"], attention_mask=batch["attention_mask"],
                       labels=batch["labels"])        
        self.log("val_loss", outputs["loss"], prog_bar=True)
        
        logits = outputs["logits"]
        predicted_labels = torch.argmax(logits, 1)

        self.echo_metrics('val', predicted_labels, batch["labels"])
        # print('#n here->',self.val_f1_non_avg_score(predicted_labels, batch["labels"]).cpu()[0])
        
        
    def test_step(self, batch, batch_idx):
        outputs = self(batch["input_ids"], attention_mask=batch["attention_mask"],
                       labels=batch["labels"])        
        
        logits = outputs["logits"]
        predicted_labels = torch.argmax(logits, 1)
        self.echo_metrics('test', predicted_labels, batch["labels"])

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
        return optimizer

# Experiment

In [11]:
CF_LABEL = 0
import random
random.seed(42)

In [12]:
print(df['Label'].value_counts())
problematic_df = df[df['Label']>0]
problematic_df.shape

Label
0    44874
1     9669
2     1117
Name: count, dtype: int64


(10786, 4)

In [13]:
def get_tweets():
    gpt_counterfactual_tweets = {}
    tot = problematic_df.shape[0]
    # print(tot)
    for i in range(0, tot):
        idx = problematic_df.iloc[i].name
        if str(i) in filtered_rephrase_tweet_gpt:
            gpt_counterfactual_tweets[idx] = filtered_rephrase_tweet_gpt[str(i)]
    return gpt_counterfactual_tweets
    
gpt_counterfactual_tweets = get_tweets()

In [14]:
import config
from nltk.tokenize import TweetTokenizer
import mosestokenizer
import numpy as np

def get_offensive_words():
    _df = pd.read_csv(config.en_swear_words_datafile, index_col=0)
    
    s = np.logical_or(_df['Level of offensiveness']=='Strongest words', _df['Level of offensiveness']=='Strong words')
    # display(_df[s]['Word'].to_list())
    wd_list = _df['Word'].to_list()
    
    _df = pd.read_csv(config.en_profanity_datafile, index_col=None)
    s = _df['severity_description'] == 'Severe'
    # wd_list.extend(_df[s]['text'].to_list())
    wd_list.extend(_df['text'].to_list())
    wd_list = set(map(str.lower, wd_list))
    return wd_list

offensive_wd_list = get_offensive_words()

In [15]:
def find_phrases(tokens, phrases):
    tokens = list(map(str.lower, tokens))
    """
    Find phrases in a list of sequential tokens.
    
    Args:
        tokens (list): List of sequential tokens.
        phrases (list): List of phrases to search for.
        
    Returns:
        A list of tuples containing the start and end index of each found phrase.
    """
    found_phrases = []
    
    for i in range(len(tokens)):
        for phrase in phrases:
            if tokens[i:i+len(phrase)] == phrase:
                found_phrases.append((i, i+len(phrase)-1))
    
    return found_phrases

def offensive_lexicon_used(t):
    tk = TweetTokenizer()
    detk = mosestokenizer.MosesDetokenizer('en')
    tk = tk.tokenize(t)
    # print(tk)
    phrase_index = find_phrases(tk, list(map(str.split, offensive_wd_list)))
    return len(phrase_index)

In [16]:
def get_counterfactual_tweets(data, labels, cf_label, single_cf_per_tweet=False, cf_size_prop_to_data=1.0, only_tweets_with_offensive_lexicon=True):
    tweets = []
    cnt =0 
    for idx in data.index:
        if idx in gpt_counterfactual_tweets:
            if (not only_tweets_with_offensive_lexicon) or offensive_lexicon_used(X[idx]):
                cnt += 1
                if not single_cf_per_tweet:
                    tweets.extend(gpt_counterfactual_tweets[idx])
                else:
                    tweets.append(gpt_counterfactual_tweets[idx][0])
    print('> Total Tweets used to generate counterfactuals ' + str(cnt))
    print('> Total counterfactuals added ' + str(len(tweets)))
    k = round(cf_size_prop_to_data * len(tweets))
    
    tweets = random.sample(tweets, k=k)
    print('> Counterfactual size ' + str(k) + ' at rate ' + str(cf_size_prop_to_data))
    cf_target = k*[cf_label]
    return pd.concat([data, pd.Series(tweets)], axis=0), pd.concat([labels, pd.Series(cf_target)], axis=0)

In [17]:
only_tweets_with_offensive_lexicon = False

In [18]:
for d in domains:
    sel_df = df[df['Domain'] == d]
    print(d, sel_df.shape)
    X, y = sel_df['Tweet'], sel_df['Label'].astype(int)
    skf = StratifiedKFold(n_splits=2, random_state=None)
    for splt_idx, (train_index , test_index) in enumerate(skf.split(X, y)):
        print(splt_idx)
        X_train , X_test = X.iloc[train_index], X.iloc[test_index]
        y_train , y_test = y.iloc[train_index] , y.iloc[train_index]
        _,_ = get_counterfactual_tweets(
                X_train, y_train, cf_label=CF_LABEL, single_cf_per_tweet=True, cf_size_prop_to_data=.1, only_tweets_with_offensive_lexicon=only_tweets_with_offensive_lexicon)

Gender (9454, 4)
0
> Total Tweets used to generate counterfactuals 845
> Total counterfactuals added 845
> Counterfactual size 84 at rate 0.1
1
> Total Tweets used to generate counterfactuals 767
> Total counterfactuals added 767
> Counterfactual size 77 at rate 0.1
Religion (10869, 4)
0
> Total Tweets used to generate counterfactuals 762
> Total counterfactuals added 762
> Counterfactual size 76 at rate 0.1
1
> Total Tweets used to generate counterfactuals 771
> Total counterfactuals added 771
> Counterfactual size 77 at rate 0.1
Race (12013, 4)
0
> Total Tweets used to generate counterfactuals 570
> Total counterfactuals added 570
> Counterfactual size 57 at rate 0.1
1
> Total Tweets used to generate counterfactuals 520
> Total counterfactuals added 520
> Counterfactual size 52 at rate 0.1
Politics (11018, 4)
0
> Total Tweets used to generate counterfactuals 1001
> Total counterfactuals added 1001
> Counterfactual size 100 at rate 0.1
1
> Total Tweets used to generate counterfactuals

In [19]:
def __exp__(train_texts, train_labels, valid_texts, valid_labels, test_texts, test_labels, CF=False):
   
    if not CF:
        print('> Train samples', len(train_texts))
    else:
        print('> Train with CF samples', len(train_texts))
    
    
    train_encodings = tokenizer(list(train_texts), truncation = True, padding = True)
    valid_encodings = tokenizer(list(valid_texts), truncation = True, padding = True)
    test_encodings = tokenizer(list(test_texts), truncation = True, padding = True)

    #datasets
    train_dataset = MyDataset(train_encodings, train_labels)
    valid_dataset = MyDataset(valid_encodings, valid_labels)
    test_dataset = MyDataset(test_encodings, test_labels)
    
    #dataloaders
    bs = BATCH_SIZE
    train_loader = DataLoader(train_dataset, batch_size = bs, shuffle = True, num_workers=4)
    valid_loader = DataLoader(valid_dataset, batch_size = bs, shuffle = True, num_workers=4)
    test_loader = DataLoader(test_dataset, batch_size = bs, shuffle = True, num_workers=4)
    print(len(train_loader), len(valid_loader))

    # Setting the seed
    pl.seed_everything(42, workers=True)
    lightning_model = LightningModel(MODEL_LLM, NUM_LABELS)

    trainer = pl.Trainer(
        max_epochs=MAX_EPOCHS,
        accelerator="gpu",
        devices=1,
        deterministic=True,
        # log_every_n_steps=30,
        enable_checkpointing=True,  
        logger=False
    )
    
    trainer.fit(model=lightning_model,
                train_dataloaders=train_loader,
                val_dataloaders=valid_loader)
    
    r = trainer.test(lightning_model, dataloaders=test_loader, ckpt_path="best")
    del lightning_model
    del trainer
    return r



In [20]:
def get_splits(_X, _y, test_val_split=0.5):
    skf = StratifiedKFold(n_splits=n_splits, random_state=None)

    for splt_idx, (train_index , test_index) in enumerate(skf.split(_X, _y)):
        x_train , x_test = _X.iloc[train_index], _X.iloc[test_index]
        y_train , y_test = _y.iloc[train_index] , _y.iloc[test_index]
    
        x_val, x_test, y_val, y_test = \
                    train_test_split(x_test, y_test, test_size=test_val_split) 

        train_texts = x_train.values
        train_labels = y_train.values
        
        valid_texts = x_val.values
        valid_labels = y_val.values
        
        test_texts = x_test.values
        test_labels = y_test.values
        yield splt_idx, (x_train, y_train, train_texts, train_labels, valid_texts, valid_labels, test_texts, test_labels)

def run_experiment_org(_X, _y, n_splits=2):
    out_lst = []
    for splt_idx, data_item in get_splits(_X, _y):
        # print(splt_idx)
        x_train, y_train, train_texts, train_labels, valid_texts, valid_labels, test_texts, test_labels = data_item

        org = __exp__(train_texts, train_labels, valid_texts, valid_labels, test_texts, test_labels, CF=False)

        l = [('splt_idx', splt_idx, len(test_labels)),  ('train', len(train_labels)), {'Org': org}]
        out_lst.append(l)
    return out_lst

def run_experiment_counter_factuals(_X, _y, n_splits=2, cf_size_prop_to_data=0.1):
    out_lst = []
    for splt_idx, data_item in get_splits(_X, _y):
        # print(splt_idx)
        x_train, y_train, train_texts, train_labels, valid_texts, valid_labels, test_texts, test_labels = data_item

        x_train_with_cf, y_training_with_cf = get_counterfactual_tweets(
            x_train, y_train, cf_label=CF_LABEL, single_cf_per_tweet=True, cf_size_prop_to_data=cf_size_prop_to_data, only_tweets_with_offensive_lexicon=only_tweets_with_offensive_lexicon)
        train_texts_cf = x_train_with_cf.values
        train_labels_cf = y_training_with_cf.values

        cf = __exp__(train_texts_cf, train_labels_cf, valid_texts, valid_labels, test_texts, test_labels, CF=True)
        l = [('splt_idx', splt_idx, len(test_labels)),  ('train', len(y_training_with_cf)), {'CF': cf}]
        out_lst.append(l)
    return out_lst

In [21]:
n_splits=5
cf_size_prop_to_data_lst = list(np.arange(0.1, 1.1, 0.1))
# n_splits=2
# cf_size_prop_to_data_lst = list(np.arange(0.1, 0.2, 0.1))
try:
    del X, y
except:
    pass

In [22]:
def run_now():
    complete_result = []
    out_dict = {}
    for d in domains:
        out_dict[d] = {}
        sel_df = df[df['Domain'] == d]
        print(d, sel_df.shape)
        X, y = sel_df['Tweet'], sel_df['Label'].astype(int)
        
        res_bert_lst = run_experiment_org(X, y, n_splits=n_splits)
        out_dict[d]['Org'] = res_bert_lst
        out_dict[d]['CF'] = {}
        for cf_size_prop_to_data in cf_size_prop_to_data_lst:
            res_bert_lst = run_experiment_counter_factuals(X, y, n_splits=n_splits, cf_size_prop_to_data=cf_size_prop_to_data)
            out_dict[d]['CF'][cf_size_prop_to_data] = res_bert_lst
        complete_result.append(out_dict)
        json.dump(out_dict, open('out/'+ MODEL_LLM + '-EP_'+ str(MAX_EPOCHS) + '-ft-lshd22.json', 'w'))
    return complete_result

start = time.time()
complete_result = run_now()
end = time.time()
elapsed = end - start
print(f"Time elapsed {elapsed/60:.2f} min")

Gender (9454, 4)
> Train samples 7563


Global seed set to 42


237 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 237/237 [00:57<00:00,  4.15it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 18.79it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 18.40it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 18.45it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 18.34it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 18.40it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 18.46it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 18.48it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 18.44it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 237/237 [01:01<00:00,  3.86it/s, val_loss=0.269, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1185-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1185-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:01<00:00, 17.35it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9143763184547424
        F1-Macro            0.7387224435806274
       F1-Weighted          0.9135377407073975
       F1_Class 0            0.949999988079071
       F1_Class 1            0.833734929561615
       F1_Class 2           0.4324324429035187
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Train samples 7563


Global seed set to 42


237 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Epoch 0: 100%|████████████████████████████████| 237/237 [00:57<00:00,  4.12it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 17.26it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 16.88it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 16.89it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 16.82it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 16.88it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 16.90it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 16.91it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 16.93it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 237/237 [01:01<00:00,  3.84it/s, val_loss=0.245, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1185-v2.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1185-v2.ckpt


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:02<00:00, 13.01it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy             0.937632143497467
        F1-Macro            0.7776308059692383
       F1-Weighted           0.936324417591095
       F1_Class 0           0.9613559246063232
       F1_Class 1           0.8715365529060364
       F1_Class 2                   0.5
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Train samples 7563


Global seed set to 42


237 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Epoch 0: 100%|████████████████████████████████| 237/237 [00:57<00:00,  4.11it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 18.30it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 17.70it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 17.69it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 17.48it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 17.49it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 17.49it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 17.50it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 17.48it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 237/237 [01:01<00:00,  3.83it/s, val_loss=0.134, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1185-v3.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1185-v3.ckpt


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:02<00:00, 13.73it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9534883499145508
        F1-Macro            0.8429104089736938
       F1-Weighted          0.9529786705970764
       F1_Class 0           0.9722991585731506
       F1_Class 1           0.9112709760665894
       F1_Class 2           0.6451612710952759
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Train samples 7563


Global seed set to 42


237 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Epoch 0: 100%|████████████████████████████████| 237/237 [00:57<00:00,  4.11it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 17.99it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 17.71it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 17.93it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 17.89it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 18.05it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 18.13it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 18.17it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 18.16it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 237/237 [01:01<00:00,  3.84it/s, val_loss=0.156, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1185-v4.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1185-v4.ckpt


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:01<00:00, 17.14it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9386892318725586
        F1-Macro            0.8085944652557373
       F1-Weighted          0.9385038614273071
       F1_Class 0            0.960770845413208
       F1_Class 1           0.8767772316932678
       F1_Class 2           0.5882353186607361
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Train samples 7564


Global seed set to 42


237 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Epoch 0: 100%|████████████████████████████████| 237/237 [00:51<00:00,  4.61it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 19.21it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 19.00it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 18.80it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 18.62it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 18.60it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 18.65it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 18.63it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 18.63it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 237/237 [00:55<00:00,  4.26it/s, val_loss=0.819, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1185-v5.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1185-v5.ckpt


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:02<00:00, 11.36it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.8222222328186035
        F1-Macro            0.6833645701408386
       F1-Weighted           0.834043025970459
       F1_Class 0           0.8756841421127319
       F1_Class 1           0.7198641896247864
       F1_Class 2           0.4545454680919647
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1315
> Total counterfactuals added 1315
> Counterfactual size 132 at rate 0.1
> Train with CF samples 7695


Global seed set to 42


241 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 241/241 [00:58<00:00,  4.11it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 18.35it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 17.83it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 18.02it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 17.93it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 18.06it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 18.10it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 18.19it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 18.20it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 241/241 [01:02<00:00,  3.87it/s, val_loss=0.280, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1205.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1205.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:01<00:00, 17.53it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9101479649543762
        F1-Macro            0.7379404306411743
       F1-Weighted          0.9139195680618286
       F1_Class 0           0.9478873014450073
       F1_Class 1           0.8428571224212646
       F1_Class 2           0.42307692766189575
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1276
> Total counterfactuals added 1276
> Counterfactual size 128 at rate 0.1
> Train with CF samples 7691


Global seed set to 42


241 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 241/241 [00:58<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 17.79it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 17.56it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 17.58it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 17.35it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 17.35it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 17.31it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 17.32it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 17.32it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 241/241 [01:02<00:00,  3.87it/s, val_loss=0.427, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1205-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1205-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:02<00:00, 13.18it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy             0.920718789100647
        F1-Macro            0.7963625192642212
       F1-Weighted          0.9159213304519653
       F1_Class 0           0.9506903290748596
       F1_Class 1           0.8068181872367859
       F1_Class 2           0.6315789222717285
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1312
> Total counterfactuals added 1312
> Counterfactual size 131 at rate 0.1
> Train with CF samples 7694


Global seed set to 42


241 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 241/241 [00:58<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 18.02it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 17.78it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 17.80it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 17.70it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 17.71it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 17.70it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 17.70it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 17.64it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 241/241 [01:02<00:00,  3.88it/s, val_loss=0.140, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1205-v2.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1205-v2.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:02<00:00, 13.93it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9418604373931885
        F1-Macro            0.8052777051925659
       F1-Weighted          0.9414651393890381
       F1_Class 0           0.9660429954528809
       F1_Class 1           0.8872901797294617
       F1_Class 2                 0.5625
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1289
> Total counterfactuals added 1289
> Counterfactual size 129 at rate 0.1
> Train with CF samples 7692


Global seed set to 42


241 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 241/241 [00:58<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 19.40it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 18.82it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 18.67it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 18.56it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 18.62it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 18.63it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 18.57it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 18.56it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 241/241 [01:02<00:00,  3.88it/s, val_loss=0.165, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1205-v3.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1205-v3.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:01<00:00, 17.35it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9545454382896423
        F1-Macro            0.8022908568382263
       F1-Weighted          0.9537342190742493
       F1_Class 0           0.9728260636329651
       F1_Class 1           0.9077306985855103
       F1_Class 2           0.5263158082962036
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1256
> Total counterfactuals added 1256
> Counterfactual size 126 at rate 0.1
> Train with CF samples 7690


Global seed set to 42


241 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 241/241 [00:54<00:00,  4.46it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 19.58it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 19.20it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 19.14it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 18.97it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 18.98it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 18.98it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 18.97it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 18.84it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 241/241 [00:58<00:00,  4.15it/s, val_loss=1.040, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1205-v4.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1205-v4.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:02<00:00, 11.51it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.7925925850868225
        F1-Macro            0.6421230435371399
       F1-Weighted           0.807754635810852
       F1_Class 0           0.8511999845504761
       F1_Class 1           0.6905537247657776
       F1_Class 2           0.38461539149284363
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1315
> Total counterfactuals added 1315
> Counterfactual size 263 at rate 0.2
> Train with CF samples 7826


Global seed set to 42


245 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 245/245 [00:59<00:00,  4.13it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 19.61it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 18.71it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 18.67it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 18.55it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 18.56it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 18.61it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 18.67it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 18.66it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 245/245 [01:03<00:00,  3.88it/s, val_loss=0.319, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1225.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1225.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:01<00:00, 17.60it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9122621417045593
        F1-Macro            0.7551889419555664
       F1-Weighted          0.9097480177879333
       F1_Class 0           0.9485243558883667
       F1_Class 1           0.8170425891876221
       F1_Class 2                   0.5
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1276
> Total counterfactuals added 1276
> Counterfactual size 255 at rate 0.2
> Train with CF samples 7818


Global seed set to 42


245 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 245/245 [00:59<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 17.98it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 17.60it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 17.52it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 17.33it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 17.29it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 17.31it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 17.34it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 17.28it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 245/245 [01:03<00:00,  3.87it/s, val_loss=0.303, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1225-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1225-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:02<00:00, 13.17it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9355179667472839
        F1-Macro            0.7767896056175232
       F1-Weighted          0.9344531893730164
       F1_Class 0           0.9589778184890747
       F1_Class 1           0.8713910579681396
       F1_Class 2                   0.5
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1312
> Total counterfactuals added 1312
> Counterfactual size 262 at rate 0.2
> Train with CF samples 7825


Global seed set to 42


245 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 245/245 [00:59<00:00,  4.13it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 18.31it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 18.05it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 18.00it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 17.82it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 17.77it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 17.75it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 17.75it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 17.74it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 245/245 [01:03<00:00,  3.88it/s, val_loss=0.115, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1225-v2.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1225-v2.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:02<00:00, 13.90it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9619450569152832
        F1-Macro             0.847542405128479
       F1-Weighted          0.9618885517120361
       F1_Class 0           0.9771626591682434
       F1_Class 1           0.9368932247161865
       F1_Class 2           0.6285714507102966
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1289
> Total counterfactuals added 1289
> Counterfactual size 258 at rate 0.2
> Train with CF samples 7821


Global seed set to 42


245 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 245/245 [00:59<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 19.78it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 19.14it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 18.94it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 18.68it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 18.68it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 18.67it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 18.69it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 18.68it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 245/245 [01:03<00:00,  3.88it/s, val_loss=0.152, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1225-v3.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1225-v3.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:01<00:00, 17.31it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9471458792686462
        F1-Macro            0.7772541046142578
       F1-Weighted          0.9458719491958618
       F1_Class 0           0.9673469662666321
       F1_Class 1           0.8938271403312683
       F1_Class 2           0.47058823704719543
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1256
> Total counterfactuals added 1256
> Counterfactual size 251 at rate 0.2
> Train with CF samples 7815


Global seed set to 42


245 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 245/245 [00:54<00:00,  4.46it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 19.79it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 19.10it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 18.94it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 18.76it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 18.79it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 18.83it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 18.92it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 18.92it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 245/245 [00:59<00:00,  4.15it/s, val_loss=0.745, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1225-v4.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1225-v4.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:02<00:00, 11.52it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.8465608358383179
        F1-Macro            0.7060197591781616
       F1-Weighted          0.8555840253829956
       F1_Class 0            0.895865261554718
       F1_Class 1           0.7460035681724548
       F1_Class 2           0.4761904776096344
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1315
> Total counterfactuals added 1315
> Counterfactual size 395 at rate 0.30000000000000004
> Train with CF samples 7958


Global seed set to 42


249 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 249/249 [01:00<00:00,  4.13it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 19.91it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 19.03it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 19.02it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 18.69it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 18.69it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 18.71it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 18.73it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 18.72it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 249/249 [01:04<00:00,  3.87it/s, val_loss=0.336, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1245.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1245.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:01<00:00, 17.59it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9048625826835632
        F1-Macro            0.7527139782905579
       F1-Weighted          0.8996093273162842
       F1_Class 0           0.9445196390151978
       F1_Class 1           0.7842105031013489
       F1_Class 2            0.529411792755127
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1276
> Total counterfactuals added 1276
> Counterfactual size 383 at rate 0.30000000000000004
> Train with CF samples 7946


Global seed set to 42


249 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 249/249 [01:00<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 18.16it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 17.57it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 17.23it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 17.12it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 17.19it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 17.23it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 17.24it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 17.22it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 249/249 [01:04<00:00,  3.86it/s, val_loss=0.250, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1245-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1245-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:02<00:00, 13.18it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9344608783721924
        F1-Macro            0.7695529460906982
       F1-Weighted          0.9337723851203918
       F1_Class 0           0.9588671326637268
       F1_Class 1           0.8697916865348816
       F1_Class 2           0.47999998927116394
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1312
> Total counterfactuals added 1312
> Counterfactual size 394 at rate 0.30000000000000004
> Train with CF samples 7957


Global seed set to 42


249 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 249/249 [01:00<00:00,  4.13it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 18.41it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 17.64it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 17.60it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 17.47it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 17.51it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 17.59it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 17.63it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 17.60it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 249/249 [01:04<00:00,  3.87it/s, val_loss=0.119, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1245-v2.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1245-v2.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:02<00:00, 13.90it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9566596150398254
        F1-Macro            0.8206529021263123
       F1-Weighted           0.956267237663269
       F1_Class 0           0.9749303460121155
       F1_Class 1           0.9245283007621765
       F1_Class 2                 0.5625
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1289
> Total counterfactuals added 1289
> Counterfactual size 387 at rate 0.30000000000000004
> Train with CF samples 7950


Global seed set to 42


249 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 249/249 [01:00<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 19.68it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 19.04it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 19.04it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 18.93it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 18.88it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 18.84it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 18.81it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 18.79it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 249/249 [01:04<00:00,  3.88it/s, val_loss=0.126, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1245-v3.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1245-v3.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:01<00:00, 17.34it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9651162624359131
        F1-Macro            0.8454967737197876
       F1-Weighted          0.9641133546829224
       F1_Class 0            0.977505087852478
       F1_Class 1           0.9339853525161743
       F1_Class 2                  0.625
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1256
> Total counterfactuals added 1256
> Counterfactual size 377 at rate 0.30000000000000004
> Train with CF samples 7941


Global seed set to 42


249 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 249/249 [00:55<00:00,  4.45it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 19.66it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 18.65it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 18.72it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 18.68it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 18.76it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 18.83it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 18.87it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 18.85it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 249/249 [00:59<00:00,  4.16it/s, val_loss=1.200, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1245-v4.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1245-v4.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:02<00:00, 11.38it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.7788359522819519
        F1-Macro            0.6036418080329895
       F1-Weighted          0.7960091829299927
       F1_Class 0           0.8391269445419312
       F1_Class 1           0.6860841512680054
       F1_Class 2           0.2857142984867096
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1315
> Total counterfactuals added 1315
> Counterfactual size 526 at rate 0.4
> Train with CF samples 8089


Global seed set to 42


253 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 253/253 [01:01<00:00,  4.13it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 19.52it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 18.70it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 18.48it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 18.38it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 18.49it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 18.54it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 18.59it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 18.57it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 253/253 [01:05<00:00,  3.87it/s, val_loss=0.332, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1265.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1265.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:01<00:00, 17.51it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.8932346701622009
        F1-Macro            0.7027230858802795
       F1-Weighted          0.9022436141967773
       F1_Class 0           0.9437722563743591
       F1_Class 1           0.8114558458328247
       F1_Class 2           0.3529411852359772
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1276
> Total counterfactuals added 1276
> Counterfactual size 510 at rate 0.4
> Train with CF samples 8073


Global seed set to 42


253 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 253/253 [01:01<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 17.47it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 17.18it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 16.90it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 16.92it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 17.02it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 17.02it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 17.04it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 17.04it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 253/253 [01:05<00:00,  3.88it/s, val_loss=0.290, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1265-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1265-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:02<00:00, 13.21it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9344608783721924
        F1-Macro            0.7895814180374146
       F1-Weighted          0.9319419860839844
       F1_Class 0            0.959785521030426
       F1_Class 1           0.8534031510353088
       F1_Class 2           0.5555555820465088
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1312
> Total counterfactuals added 1312
> Counterfactual size 525 at rate 0.4
> Train with CF samples 8088


Global seed set to 42


253 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


                                                                                

Traceback (most recent call last):
  File "/usr/lib/python3.10/multiprocessing/util.py", line 300, in _run_finalizers
    finalizer()
  File "/usr/lib/python3.10/multiprocessing/util.py", line 224, in __call__
    res = self._callback(*self._args, **self._kwargs)
  File "/usr/lib/python3.10/multiprocessing/util.py", line 133, in _remove_temp_dir
    rmtree(tempdir)
  File "/usr/lib/python3.10/shutil.py", line 730, in rmtree
    onerror(os.rmdir, path, sys.exc_info())
  File "/usr/lib/python3.10/shutil.py", line 728, in rmtree
    os.rmdir(path)
OSError: [Errno 39] Directory not empty: '/tmp/pymp-fraxn0mf'


Epoch 0: 100%|████████████████████████████████| 253/253 [01:01<00:00,  4.13it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 18.27it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 17.95it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 17.95it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 17.70it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 17.69it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 17.67it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 17.71it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 17.72it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 253/253 [01:05<00:00,  3.85it/s, val_loss=0.125, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1265-v2.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1265-v2.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:02<00:00, 13.93it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9640591740608215
        F1-Macro            0.8657573461532593
       F1-Weighted           0.963338315486908
       F1_Class 0           0.9794238805770874
       F1_Class 1            0.93034827709198
       F1_Class 2                 0.6875
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1289
> Total counterfactuals added 1289
> Counterfactual size 516 at rate 0.4
> Train with CF samples 8079


Global seed set to 42


253 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 253/253 [01:01<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 19.54it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 19.06it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 18.77it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 18.52it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 18.51it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 18.55it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 18.61it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 18.61it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 253/253 [01:05<00:00,  3.85it/s, val_loss=0.155, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1265-v3.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1265-v3.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:01<00:00, 17.36it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9534883499145508
        F1-Macro            0.8355574607849121
       F1-Weighted          0.9529853463172913
       F1_Class 0            0.971389651298523
       F1_Class 1           0.9037036895751953
       F1_Class 2           0.6315789222717285
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1256
> Total counterfactuals added 1256
> Counterfactual size 502 at rate 0.4
> Train with CF samples 8066


Global seed set to 42


253 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 253/253 [00:56<00:00,  4.45it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 19.37it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 19.06it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 19.12it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 19.05it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 19.03it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 19.01it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 19.00it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 18.98it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 253/253 [01:00<00:00,  4.15it/s, val_loss=0.776, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1265-v4.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1265-v4.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:02<00:00, 11.53it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.8539682626724243
        F1-Macro            0.7111126184463501
       F1-Weighted          0.8621634244918823
       F1_Class 0            0.901751697063446
       F1_Class 1           0.7553957104682922
       F1_Class 2           0.4761904776096344
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1315
> Total counterfactuals added 1315
> Counterfactual size 658 at rate 0.5
> Train with CF samples 8221


Global seed set to 42


257 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 257/257 [01:02<00:00,  4.13it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 19.71it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 18.62it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 18.60it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 18.54it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 18.59it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 18.67it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 18.71it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 18.66it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 257/257 [01:06<00:00,  3.88it/s, val_loss=0.367, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1285.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1285.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:01<00:00, 17.59it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.8985201120376587
        F1-Macro            0.7039049863815308
       F1-Weighted          0.8941894769668579
       F1_Class 0           0.9415760636329651
       F1_Class 1                 0.78125
       F1_Class 2           0.3888888955116272
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1276
> Total counterfactuals added 1276
> Counterfactual size 638 at rate 0.5
> Train with CF samples 8201


Global seed set to 42


257 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 257/257 [01:02<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 17.65it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 17.33it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 17.09it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 16.99it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 17.06it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 17.12it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 17.18it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 17.15it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 257/257 [01:06<00:00,  3.88it/s, val_loss=0.263, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1285-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1285-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:02<00:00, 13.19it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy             0.937632143497467
        F1-Macro              0.7725830078125
       F1-Weighted          0.9371663331985474
       F1_Class 0           0.9614604711532593
       F1_Class 1            0.876288652420044
       F1_Class 2           0.47999998927116394
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1312
> Total counterfactuals added 1312
> Counterfactual size 656 at rate 0.5
> Train with CF samples 8219


Global seed set to 42


257 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 257/257 [01:02<00:00,  4.13it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 18.61it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 17.65it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 17.64it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 17.54it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 17.59it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 17.62it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 17.60it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 17.51it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 257/257 [01:06<00:00,  3.87it/s, val_loss=0.125, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1285-v2.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1285-v2.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:02<00:00, 13.84it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9640591740608215
        F1-Macro            0.8218526840209961
       F1-Weighted          0.9635214805603027
       F1_Class 0           0.9806362390518188
       F1_Class 1           0.9394673109054565
       F1_Class 2           0.5454545617103577
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1289
> Total counterfactuals added 1289
> Counterfactual size 644 at rate 0.5
> Train with CF samples 8207


Global seed set to 42


257 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 257/257 [01:02<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 20.02it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 19.32it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 18.67it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 18.42it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 18.44it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 18.53it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 18.60it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 18.60it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 257/257 [01:06<00:00,  3.88it/s, val_loss=0.167, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1285-v3.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1285-v3.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:01<00:00, 17.39it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9545454382896423
        F1-Macro            0.8233063220977783
       F1-Weighted          0.9538227319717407
       F1_Class 0           0.9712722301483154
       F1_Class 1           0.9104115962982178
       F1_Class 2           0.5882353186607361
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1256
> Total counterfactuals added 1256
> Counterfactual size 628 at rate 0.5
> Train with CF samples 8192


Global seed set to 42


256 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

Traceback (most recent call last):
  File "/usr/lib/python3.10/multiprocessing/util.py", line 300, in _run_finalizers
    finalizer()
  File "/usr/lib/python3.10/multiprocessing/util.py", line 224, in __call__
    res = self._callback(*self._args, **self._kwargs)
  File "/usr/lib/python3.10/multiprocessing/util.py", line 133, in _remove_temp_dir
    rmtree(tempdir)
  File "/usr/lib/python3.10/shutil.py", line 730, in rmtree
    onerror(os.rmdir, path, sys.exc_info())
  File "/usr/lib/python3.10/shutil.py", line 728, in rmtree
    os.rmdir(path)
OSError: [Errno 39] Directory not empty: '/tmp/pymp-zgzgya0n'
  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 256/256 [00:57<00:00,  4.44it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 19.65it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 19.26it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 18.91it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 18.72it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 18.78it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 18.85it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 18.90it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 18.91it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 256/256 [01:01<00:00,  4.16it/s, val_loss=0.825, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1280.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1280.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:02<00:00, 11.53it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy             0.823280394077301
        F1-Macro            0.6771970391273499
       F1-Weighted          0.8350542783737183
       F1_Class 0           0.8776305317878723
       F1_Class 1           0.7191780805587769
       F1_Class 2           0.43478259444236755
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1315
> Total counterfactuals added 1315
> Counterfactual size 789 at rate 0.6
> Train with CF samples 8352


Global seed set to 42


261 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 261/261 [01:03<00:00,  4.12it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 19.44it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 18.68it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 18.78it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 18.52it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 18.56it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 18.56it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 18.61it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 18.63it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 261/261 [01:07<00:00,  3.88it/s, val_loss=0.393, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1305.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1305.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:01<00:00, 17.55it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.8816067576408386
        F1-Macro             0.698369562625885
       F1-Weighted           0.87284255027771
       F1_Class 0           0.9332443475723267
       F1_Class 1           0.7118644118309021
       F1_Class 2           0.44999998807907104
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1276
> Total counterfactuals added 1276
> Counterfactual size 766 at rate 0.6
> Train with CF samples 8329


Global seed set to 42


261 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 261/261 [01:03<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 17.76it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 17.54it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 17.38it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 17.10it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 17.13it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 17.20it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 17.26it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 17.23it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 261/261 [01:07<00:00,  3.87it/s, val_loss=0.254, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1305-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1305-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:02<00:00, 13.22it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy             0.937632143497467
        F1-Macro            0.7731586694717407
       F1-Weighted          0.9374471306800842
       F1_Class 0           0.9613034725189209
       F1_Class 1           0.8781725764274597
       F1_Class 2           0.47999998927116394
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1312
> Total counterfactuals added 1312
> Counterfactual size 787 at rate 0.6
> Train with CF samples 8350


Global seed set to 42


261 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 261/261 [01:03<00:00,  4.13it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 18.21it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 18.01it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 17.99it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 17.79it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 17.77it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 17.73it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 17.76it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 17.75it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 261/261 [01:07<00:00,  3.87it/s, val_loss=0.117, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1305-v2.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1305-v2.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:02<00:00, 13.89it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9651162624359131
        F1-Macro            0.8254079818725586
       F1-Weighted          0.9629804491996765
       F1_Class 0            0.980663001537323
       F1_Class 1           0.9355608820915222
       F1_Class 2           0.5600000023841858
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1289
> Total counterfactuals added 1289
> Counterfactual size 773 at rate 0.6
> Train with CF samples 8336


Global seed set to 42


261 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 261/261 [01:03<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 19.69it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 19.20it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 19.12it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 18.78it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 18.74it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 18.74it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 18.78it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 18.78it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 261/261 [01:07<00:00,  3.89it/s, val_loss=0.156, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1305-v3.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1305-v3.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:01<00:00, 17.43it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9492600560188293
        F1-Macro            0.7983031868934631
       F1-Weighted          0.9490726590156555
       F1_Class 0            0.969072163105011
       F1_Class 1            0.89952152967453
       F1_Class 2           0.5263158082962036
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1256
> Total counterfactuals added 1256
> Counterfactual size 754 at rate 0.6
> Train with CF samples 8318


Global seed set to 42


260 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 260/260 [00:58<00:00,  4.44it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 19.96it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 19.04it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 18.92it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 18.88it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 18.97it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 18.95it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 18.95it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 18.91it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 260/260 [01:02<00:00,  4.14it/s, val_loss=0.968, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1300.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1300.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:02<00:00, 11.50it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.8190476298332214
        F1-Macro            0.6591494083404541
       F1-Weighted          0.8313809037208557
       F1_Class 0           0.8739232420921326
       F1_Class 1           0.7189096808433533
       F1_Class 2           0.38461539149284363
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1315
> Total counterfactuals added 1315
> Counterfactual size 921 at rate 0.7000000000000001
> Train with CF samples 8484


Global seed set to 42


266 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 266/266 [01:04<00:00,  4.13it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 19.32it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 18.59it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 18.64it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 18.56it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 18.59it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 18.64it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 18.69it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 18.70it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 266/266 [01:08<00:00,  3.88it/s, val_loss=0.297, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1330.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1330.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:01<00:00, 17.57it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9016913175582886
        F1-Macro             0.750859260559082
       F1-Weighted          0.8957427740097046
       F1_Class 0           0.9411764740943909
       F1_Class 1           0.7780678868293762
       F1_Class 2           0.5333333611488342
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1276
> Total counterfactuals added 1276
> Counterfactual size 893 at rate 0.7000000000000001
> Train with CF samples 8456


Global seed set to 42


265 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 265/265 [01:04<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 17.94it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 17.19it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 17.27it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 17.06it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 17.09it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 17.14it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 17.20it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 17.19it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 265/265 [01:08<00:00,  3.86it/s, val_loss=0.465, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1325.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1325.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:02<00:00, 13.19it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9090909361839294
        F1-Macro            0.7913302183151245
       F1-Weighted          0.9019079804420471
       F1_Class 0           0.9440104365348816
       F1_Class 1           0.7633135914802551
       F1_Class 2           0.6666666865348816
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1312
> Total counterfactuals added 1312
> Counterfactual size 918 at rate 0.7000000000000001
> Train with CF samples 8481


Global seed set to 42


266 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 266/266 [01:04<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 18.65it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 17.52it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 17.62it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 17.60it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 17.65it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 17.64it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 17.65it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 17.62it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 266/266 [01:08<00:00,  3.89it/s, val_loss=0.113, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1330-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1330-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:02<00:00, 13.89it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9524312615394592
        F1-Macro            0.8047952651977539
       F1-Weighted          0.9496187567710876
       F1_Class 0            0.972862958908081
       F1_Class 1           0.9030612111091614
       F1_Class 2           0.5384615659713745
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1289
> Total counterfactuals added 1289
> Counterfactual size 902 at rate 0.7000000000000001
> Train with CF samples 8465


Global seed set to 42


265 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 265/265 [01:03<00:00,  4.15it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 19.16it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 19.05it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 19.03it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 18.75it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 18.74it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 18.73it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 18.74it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 18.74it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 265/265 [01:08<00:00,  3.89it/s, val_loss=0.204, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1325-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1325-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:01<00:00, 17.31it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9492600560188293
        F1-Macro            0.7777407169342041
       F1-Weighted          0.9471831321716309
       F1_Class 0           0.9691275358200073
       F1_Class 1           0.8935064673423767
       F1_Class 2           0.47058823704719543
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1256
> Total counterfactuals added 1256
> Counterfactual size 879 at rate 0.7000000000000001
> Train with CF samples 8443


Global seed set to 42


264 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 264/264 [00:59<00:00,  4.40it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 19.97it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 19.20it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 18.93it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 18.71it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 18.78it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 18.83it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 18.88it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 18.85it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 264/264 [01:04<00:00,  4.11it/s, val_loss=0.756, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1320.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1320.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:02<00:00, 11.53it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy             0.823280394077301
        F1-Macro            0.6666807532310486
       F1-Weighted          0.8351252675056458
       F1_Class 0           0.8774394989013672
       F1_Class 1           0.7226027250289917
       F1_Class 2           0.4000000059604645
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1315
> Total counterfactuals added 1315
> Counterfactual size 1052 at rate 0.8
> Train with CF samples 8615


Global seed set to 42


270 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 270/270 [01:05<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 19.94it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 19.00it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 18.82it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 18.60it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 18.66it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 18.72it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 18.72it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 18.68it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 270/270 [01:09<00:00,  3.89it/s, val_loss=0.271, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1350.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1350.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:01<00:00, 17.56it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9090909361839294
        F1-Macro            0.7291290760040283
       F1-Weighted          0.9094205498695374
       F1_Class 0           0.9492703080177307
       F1_Class 1           0.8195121884346008
       F1_Class 2           0.41860464215278625
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1276
> Total counterfactuals added 1276
> Counterfactual size 1021 at rate 0.8
> Train with CF samples 8584


Global seed set to 42


269 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 269/269 [01:05<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 17.98it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 17.66it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 17.46it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 17.28it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 17.28it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 17.32it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 17.34it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 17.27it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 269/269 [01:09<00:00,  3.89it/s, val_loss=0.300, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1345.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1345.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:02<00:00, 13.14it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy             0.920718789100647
        F1-Macro            0.7710381746292114
       F1-Weighted          0.9171662330627441
       F1_Class 0           0.9509933590888977
       F1_Class 1           0.8166666626930237
       F1_Class 2           0.5454545617103577
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1312
> Total counterfactuals added 1312
> Counterfactual size 1050 at rate 0.8
> Train with CF samples 8613


Global seed set to 42


270 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 270/270 [01:05<00:00,  4.13it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 18.26it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 17.57it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 17.51it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 17.48it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 17.49it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 17.54it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 17.53it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 17.53it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 270/270 [01:09<00:00,  3.89it/s, val_loss=0.117, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1350-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1350-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:02<00:00, 13.91it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy             0.957716703414917
        F1-Macro            0.8247402906417847
       F1-Weighted          0.9566683769226074
       F1_Class 0            0.977319598197937
       F1_Class 1           0.9162561297416687
       F1_Class 2           0.5806451439857483
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1289
> Total counterfactuals added 1289
> Counterfactual size 1031 at rate 0.8
> Train with CF samples 8594


Global seed set to 42


269 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 269/269 [01:05<00:00,  4.13it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 19.44it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 18.59it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 18.62it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 18.53it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 18.56it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 18.61it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 18.66it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 18.66it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 269/269 [01:09<00:00,  3.89it/s, val_loss=0.131, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1345-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1345-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:01<00:00, 17.41it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy             0.957716703414917
        F1-Macro            0.8264719247817993
       F1-Weighted           0.956991970539093
       F1_Class 0           0.9733059406280518
       F1_Class 1           0.9178743958473206
       F1_Class 2           0.5882353186607361
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1256
> Total counterfactuals added 1256
> Counterfactual size 1005 at rate 0.8
> Train with CF samples 8569


Global seed set to 42


268 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 268/268 [01:00<00:00,  4.40it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 19.67it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 18.76it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 18.67it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 18.59it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 18.69it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 18.79it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 18.86it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 18.84it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 268/268 [01:05<00:00,  4.12it/s, val_loss=0.756, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1340.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1340.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:02<00:00, 11.50it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.8169311881065369
        F1-Macro            0.6570069193840027
       F1-Weighted          0.8329901695251465
       F1_Class 0           0.8758782148361206
       F1_Class 1           0.7201426029205322
       F1_Class 2                  0.375
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1315
> Total counterfactuals added 1315
> Counterfactual size 1184 at rate 0.9
> Train with CF samples 8747


Global seed set to 42


274 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 274/274 [01:06<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 18.89it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 18.62it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 18.75it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 18.68it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 18.73it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 18.75it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 18.76it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 18.70it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 274/274 [01:10<00:00,  3.90it/s, val_loss=0.335, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1370.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1370.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:01<00:00, 17.60it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy             0.900634229183197
        F1-Macro            0.7225204706192017
       F1-Weighted           0.895995020866394
       F1_Class 0           0.9430122375488281
       F1_Class 1           0.7801046967506409
       F1_Class 2           0.4444444477558136
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1276
> Total counterfactuals added 1276
> Counterfactual size 1148 at rate 0.9
> Train with CF samples 8711


Global seed set to 42


273 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 273/273 [01:06<00:00,  4.13it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 17.89it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 16.75it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 16.88it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 16.93it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 17.03it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 17.07it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 17.07it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 17.07it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 273/273 [01:10<00:00,  3.90it/s, val_loss=0.279, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1365.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1365.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:02<00:00, 13.14it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9281184077262878
        F1-Macro            0.7439403533935547
       F1-Weighted          0.9277254343032837
       F1_Class 0           0.9589225649833679
       F1_Class 1           0.8443271517753601
       F1_Class 2           0.4285714328289032
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1312
> Total counterfactuals added 1312
> Counterfactual size 1181 at rate 0.9
> Train with CF samples 8744


Global seed set to 42


274 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 274/274 [01:06<00:00,  4.13it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 18.23it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 17.26it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 17.43it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 17.44it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 17.49it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 17.52it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 17.54it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 17.54it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 274/274 [01:10<00:00,  3.88it/s, val_loss=0.123, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1370-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1370-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:02<00:00, 13.86it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9640591740608215
        F1-Macro             0.867089569568634
       F1-Weighted          0.9636954069137573
       F1_Class 0           0.9785169959068298
       F1_Class 1            0.935251772403717
       F1_Class 2                 0.6875
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1289
> Total counterfactuals added 1289
> Counterfactual size 1160 at rate 0.9
> Train with CF samples 8723


Global seed set to 42


273 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 273/273 [01:06<00:00,  4.12it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 19.42it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 18.27it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 18.39it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 18.24it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 18.33it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 18.39it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 18.44it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 18.40it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 273/273 [01:10<00:00,  3.87it/s, val_loss=0.148, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1365-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1365-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:01<00:00, 17.32it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9503171443939209
        F1-Macro            0.8086390495300293
       F1-Weighted          0.9498378038406372
       F1_Class 0           0.9691569805145264
       F1_Class 1           0.9012048244476318
       F1_Class 2           0.5555555820465088
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1256
> Total counterfactuals added 1256
> Counterfactual size 1130 at rate 0.9
> Train with CF samples 8694


Global seed set to 42


272 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 272/272 [01:01<00:00,  4.40it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 19.40it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 19.12it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 18.90it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 18.77it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 18.82it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 18.85it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 18.88it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 18.88it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 272/272 [01:05<00:00,  4.13it/s, val_loss=0.928, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1360.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1360.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:02<00:00, 11.52it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.7989417910575867
        F1-Macro            0.6818891763687134
       F1-Weighted          0.8136463761329651
       F1_Class 0           0.8571428656578064
       F1_Class 1            0.688524603843689
       F1_Class 2                   0.5
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1315
> Total counterfactuals added 1315
> Counterfactual size 1315 at rate 1.0
> Train with CF samples 8878


Global seed set to 42


278 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 278/278 [01:07<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 19.66it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 18.75it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 18.75it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 18.59it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 18.65it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 18.69it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 18.73it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 18.68it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 278/278 [01:11<00:00,  3.90it/s, val_loss=0.279, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1390.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1390.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:01<00:00, 17.58it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.8985201120376587
        F1-Macro            0.7203483581542969
       F1-Weighted          0.8954137563705444
       F1_Class 0           0.9398084878921509
       F1_Class 1           0.7888040542602539
       F1_Class 2           0.4324324429035187
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1276
> Total counterfactuals added 1276
> Counterfactual size 1276 at rate 1.0
> Train with CF samples 8839


Global seed set to 42


277 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 277/277 [01:06<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 18.26it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 17.52it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 17.43it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 17.33it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 17.37it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 17.39it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 17.37it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 17.32it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 277/277 [01:11<00:00,  3.90it/s, val_loss=0.310, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1385.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1385.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:02<00:00, 13.17it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9270613193511963
        F1-Macro            0.7773939371109009
       F1-Weighted          0.9253812432289124
       F1_Class 0           0.9558823704719543
       F1_Class 1            0.837837815284729
       F1_Class 2           0.5384615659713745
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1312
> Total counterfactuals added 1312
> Counterfactual size 1312 at rate 1.0
> Train with CF samples 8875


Global seed set to 42


278 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 278/278 [01:07<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 18.28it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 17.67it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 17.50it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 17.47it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 17.52it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 17.53it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 17.56it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 17.53it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 278/278 [01:11<00:00,  3.89it/s, val_loss=0.100, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1390-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1390-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:02<00:00, 13.88it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9640591740608215
        F1-Macro            0.8529260754585266
       F1-Weighted           0.963295042514801
       F1_Class 0           0.9793103337287903
       F1_Class 1           0.9343065619468689
       F1_Class 2           0.6451612710952759
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1289
> Total counterfactuals added 1289
> Counterfactual size 1289 at rate 1.0
> Train with CF samples 8852


Global seed set to 42


277 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 277/277 [01:07<00:00,  4.13it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 19.65it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 18.55it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 18.60it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 18.53it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 18.55it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 18.62it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 18.65it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 18.59it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 277/277 [01:11<00:00,  3.87it/s, val_loss=0.152, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1385-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1385-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:01<00:00, 17.32it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9482029676437378
        F1-Macro            0.8073142766952515
       F1-Weighted          0.9480947852134705
       F1_Class 0            0.967563807964325
       F1_Class 1           0.8988234996795654
       F1_Class 2           0.5555555820465088
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1256
> Total counterfactuals added 1256
> Counterfactual size 1256 at rate 1.0
> Train with CF samples 8820


Global seed set to 42


276 30


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 276/276 [01:02<00:00,  4.40it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/30 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▋                  | 1/30 [00:00<00:01, 20.02it/s][A
Validation DataLoader 0:   7%|█▎                 | 2/30 [00:00<00:01, 19.03it/s][A
Validation DataLoader 0:  10%|█▉                 | 3/30 [00:00<00:01, 18.98it/s][A
Validation DataLoader 0:  13%|██▌                | 4/30 [00:00<00:01, 18.82it/s][A
Validation DataLoader 0:  17%|███▏               | 5/30 [00:00<00:01, 18.90it/s][A
Validation DataLoader 0:  20%|███▊               | 6/30 [00:00<00:01, 18.96it/s][A
Validation DataLoader 0:  23%|████▍              | 7/30 [00:00<00:01, 18.99it/s][A
Validation DataLoader 0:  27%|█████              | 8/30 [00:00<00:01, 18.92it/s][A
Validation DataLoader 0:  30%|█████▋         

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 276/276 [01:06<00:00,  4.13it/s, val_loss=0.764, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1380.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1380.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 30/30 [00:02<00:00, 11.48it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.8328042030334473
        F1-Macro            0.6714046001434326
       F1-Weighted          0.8430546522140503
       F1_Class 0            0.886310875415802
       F1_Class 1           0.7279029488563538
       F1_Class 2           0.4000000059604645
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Religion (10869, 4)
> Train samples 8695


Global seed set to 42


272 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 272/272 [00:51<00:00,  5.23it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 17.34it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 17.04it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 17.10it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 16.94it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 16.97it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 16.99it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 17.00it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 17.02it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 272/272 [00:56<00:00,  4.82it/s, val_loss=0.445, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1360-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1360-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:01<00:00, 17.34it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9006439447402954
        F1-Macro             0.667048990726471
       F1-Weighted          0.8947256207466125
       F1_Class 0            0.952431857585907
       F1_Class 1           0.6244725584983826
       F1_Class 2           0.42424243688583374
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Train samples 8695


Global seed set to 42


272 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Epoch 0: 100%|████████████████████████████████| 272/272 [00:51<00:00,  5.24it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 19.49it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 19.07it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 18.75it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 18.56it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 18.60it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 18.65it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 18.65it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 18.62it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 272/272 [00:56<00:00,  4.83it/s, val_loss=0.239, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1360-v2.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1360-v2.ckpt


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:01<00:00, 18.22it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy             0.935602605342865
        F1-Macro            0.7329068183898926
       F1-Weighted          0.9360449910163879
       F1_Class 0           0.9657495617866516
       F1_Class 1           0.8579710125923157
       F1_Class 2                  0.375
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Train samples 8695


Global seed set to 42


272 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Epoch 0: 100%|████████████████████████████████| 272/272 [00:52<00:00,  5.22it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 17.84it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 17.48it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 17.43it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 17.27it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 17.39it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 17.47it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 17.50it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 17.46it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 272/272 [00:56<00:00,  4.83it/s, val_loss=0.194, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1360-v3.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1360-v3.ckpt


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:01<00:00, 17.54it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9484820365905762
        F1-Macro            0.7362754344940186
       F1-Weighted          0.9468286037445068
       F1_Class 0           0.9714605212211609
       F1_Class 1           0.8802228569984436
       F1_Class 2           0.3571428656578064
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Train samples 8695


Global seed set to 42


272 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Epoch 0: 100%|████████████████████████████████| 272/272 [00:46<00:00,  5.81it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 19.18it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 18.29it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 17.75it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 17.66it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 17.69it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 17.78it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 17.86it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 17.84it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 272/272 [00:50<00:00,  5.34it/s, val_loss=0.121, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1360-v4.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1360-v4.ckpt


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:02<00:00, 14.65it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9595215916633606
        F1-Macro            0.7514421939849854
       F1-Weighted          0.9565798044204712
       F1_Class 0           0.9803259968757629
       F1_Class 1            0.893048107624054
       F1_Class 2            0.380952388048172
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Train samples 8696


Global seed set to 42


272 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Epoch 0: 100%|████████████████████████████████| 272/272 [00:51<00:00,  5.24it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 18.09it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 17.35it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 17.37it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 17.26it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 17.28it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 17.29it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 17.30it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 17.27it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 272/272 [00:56<00:00,  4.82it/s, val_loss=1.080, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1360-v5.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1360-v5.ckpt


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:01<00:00, 18.39it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.7792088389396667
        F1-Macro            0.5779330134391785
       F1-Weighted          0.8044369220733643
       F1_Class 0           0.8600379824638367
       F1_Class 1            0.577464759349823
       F1_Class 2           0.29629629850387573
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1229
> Total counterfactuals added 1229
> Counterfactual size 123 at rate 0.1
> Train with CF samples 8818


Global seed set to 42


276 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 276/276 [00:52<00:00,  5.25it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 17.70it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 17.42it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 17.33it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 17.13it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 17.12it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 17.16it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 17.18it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 17.12it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 276/276 [00:57<00:00,  4.84it/s, val_loss=0.584, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1380-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1380-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:01<00:00, 17.34it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9034038782119751
        F1-Macro            0.6859325170516968
       F1-Weighted          0.8920212984085083
       F1_Class 0           0.9498680830001831
       F1_Class 1            0.607929527759552
       F1_Class 2                   0.5
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1223
> Total counterfactuals added 1223
> Counterfactual size 122 at rate 0.1
> Train with CF samples 8817


Global seed set to 42


276 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 276/276 [00:52<00:00,  5.25it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 19.17it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 18.88it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 18.86it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 18.73it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 18.71it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 18.74it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 18.78it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 18.78it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 276/276 [00:56<00:00,  4.86it/s, val_loss=0.218, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1380-v2.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1380-v2.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:01<00:00, 18.31it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9429622888565063
        F1-Macro            0.7478517293930054
       F1-Weighted          0.9407058358192444
       F1_Class 0           0.9693934321403503
       F1_Class 1           0.8639053106307983
       F1_Class 2           0.41025641560554504
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1234
> Total counterfactuals added 1234
> Counterfactual size 123 at rate 0.1
> Train with CF samples 8818


Global seed set to 42


276 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 276/276 [00:52<00:00,  5.25it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 18.38it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 18.06it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 17.75it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 17.56it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 17.57it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 17.63it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 17.67it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 17.63it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 276/276 [00:57<00:00,  4.84it/s, val_loss=0.190, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1380-v3.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1380-v3.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:01<00:00, 17.53it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9466421604156494
        F1-Macro            0.7641397714614868
       F1-Weighted          0.9452054500579834
       F1_Class 0           0.9694613814353943
       F1_Class 1            0.871345043182373
       F1_Class 2           0.4516128897666931
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1222
> Total counterfactuals added 1222
> Counterfactual size 122 at rate 0.1
> Train with CF samples 8817


Global seed set to 42


276 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 276/276 [00:47<00:00,  5.81it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 18.38it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 17.60it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 17.81it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 17.78it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 17.77it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 17.76it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 17.81it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 17.81it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 276/276 [00:51<00:00,  5.32it/s, val_loss=0.110, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1380-v4.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1380-v4.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:02<00:00, 14.65it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9540018439292908
        F1-Macro            0.8346099853515625
       F1-Weighted          0.9520664811134338
       F1_Class 0           0.9757174253463745
       F1_Class 1           0.8614457845687866
       F1_Class 2           0.6666666865348816
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1224
> Total counterfactuals added 1224
> Counterfactual size 122 at rate 0.1
> Train with CF samples 8818


Global seed set to 42


276 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 276/276 [00:52<00:00,  5.25it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 18.28it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 17.30it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 17.30it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 17.18it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 17.20it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 17.26it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 17.29it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 17.26it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 276/276 [00:57<00:00,  4.84it/s, val_loss=1.180, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1380-v5.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1380-v5.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:01<00:00, 18.38it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.7856485843658447
        F1-Macro            0.6220453977584839
       F1-Weighted          0.8111838698387146
       F1_Class 0           0.8625712394714355
       F1_Class 1            0.591800332069397
       F1_Class 2           0.4117647111415863
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1229
> Total counterfactuals added 1229
> Counterfactual size 246 at rate 0.2
> Train with CF samples 8941


Global seed set to 42


280 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 280/280 [00:53<00:00,  5.25it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 17.60it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 17.01it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 17.09it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 17.05it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 17.06it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 17.05it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 17.03it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 17.02it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 280/280 [00:57<00:00,  4.84it/s, val_loss=0.493, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1400.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1400.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:01<00:00, 17.35it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy             0.90616375207901
        F1-Macro            0.6880570650100708
       F1-Weighted          0.8957405686378479
       F1_Class 0           0.9503171443939209
       F1_Class 1           0.6355932354927063
       F1_Class 2            0.47826087474823
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1223
> Total counterfactuals added 1223
> Counterfactual size 245 at rate 0.2
> Train with CF samples 8940


Global seed set to 42


280 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 280/280 [00:53<00:00,  5.25it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 19.95it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 18.93it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 19.01it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 18.84it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 18.83it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 18.81it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 18.78it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 18.76it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 280/280 [00:57<00:00,  4.84it/s, val_loss=0.249, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1400-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1400-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:01<00:00, 18.20it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9374424815177917
        F1-Macro            0.7634786367416382
       F1-Weighted          0.9348281621932983
       F1_Class 0           0.9657079577445984
       F1_Class 1           0.8369230628013611
       F1_Class 2           0.4878048896789551
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1234
> Total counterfactuals added 1234
> Counterfactual size 247 at rate 0.2
> Train with CF samples 8942


Global seed set to 42


280 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 280/280 [00:53<00:00,  5.26it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 18.05it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 17.68it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 17.48it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 17.36it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 17.47it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 17.54it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 17.57it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 17.53it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 280/280 [00:58<00:00,  4.80it/s, val_loss=0.189, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1400-v2.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1400-v2.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:01<00:00, 17.54it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9466421604156494
        F1-Macro             0.746077299118042
       F1-Weighted           0.944150447845459
       F1_Class 0           0.9697303175926208
       F1_Class 1           0.8685015439987183
       F1_Class 2           0.4000000059604645
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1222
> Total counterfactuals added 1222
> Counterfactual size 244 at rate 0.2
> Train with CF samples 8939


Global seed set to 42


280 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 280/280 [00:48<00:00,  5.81it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 18.73it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 18.03it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 17.79it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 17.72it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 17.80it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 17.80it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 17.82it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 17.83it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 280/280 [00:52<00:00,  5.31it/s, val_loss=0.117, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1400-v3.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1400-v3.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:02<00:00, 14.68it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9622815251350403
        F1-Macro            0.8238492012023926
       F1-Weighted          0.9602982997894287
       F1_Class 0           0.9795467257499695
       F1_Class 1           0.8994082808494568
       F1_Class 2           0.5925925970077515
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1224
> Total counterfactuals added 1224
> Counterfactual size 245 at rate 0.2
> Train with CF samples 8941


Global seed set to 42


280 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 280/280 [00:53<00:00,  5.26it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 18.03it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 17.58it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 17.35it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 17.19it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 17.18it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 17.23it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 17.29it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 17.27it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 280/280 [00:57<00:00,  4.84it/s, val_loss=0.989, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1400-v4.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1400-v4.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:01<00:00, 18.52it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.7718491554260254
        F1-Macro            0.5288134217262268
       F1-Weighted           0.796808123588562
       F1_Class 0           0.8555060625076294
       F1_Class 1           0.5709342360496521
       F1_Class 2           0.1599999964237213
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1229
> Total counterfactuals added 1229
> Counterfactual size 369 at rate 0.30000000000000004
> Train with CF samples 9064


Global seed set to 42


284 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 284/284 [00:54<00:00,  5.25it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 17.27it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 16.84it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 16.97it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 16.94it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 16.96it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 16.93it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 16.94it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 16.94it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 284/284 [00:58<00:00,  4.86it/s, val_loss=0.475, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1420.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1420.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:01<00:00, 17.52it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9043238162994385
        F1-Macro             0.694391667842865
       F1-Weighted          0.8957123756408691
       F1_Class 0           0.9485411047935486
       F1_Class 1           0.6440678238868713
       F1_Class 2           0.49056604504585266
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1223
> Total counterfactuals added 1223
> Counterfactual size 367 at rate 0.30000000000000004
> Train with CF samples 9062


Global seed set to 42


284 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 284/284 [00:53<00:00,  5.29it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 19.75it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 19.32it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 18.90it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 18.85it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 18.91it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 18.96it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 18.98it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 18.94it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 284/284 [00:58<00:00,  4.89it/s, val_loss=0.240, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1420-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1420-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:01<00:00, 18.42it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9392824172973633
        F1-Macro            0.7679743766784668
       F1-Weighted          0.9391850829124451
       F1_Class 0           0.9668725728988647
       F1_Class 1           0.8587896227836609
       F1_Class 2            0.47826087474823
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1234
> Total counterfactuals added 1234
> Counterfactual size 370 at rate 0.30000000000000004
> Train with CF samples 9065


Global seed set to 42


284 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 284/284 [00:53<00:00,  5.30it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 18.03it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 17.63it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 17.62it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 17.52it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 17.60it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 17.66it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 17.70it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 17.68it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 284/284 [00:58<00:00,  4.89it/s, val_loss=0.236, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1420-v2.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1420-v2.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:01<00:00, 17.72it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9429622888565063
        F1-Macro            0.7434365749359131
       F1-Weighted          0.9440737962722778
       F1_Class 0           0.9666854739189148
       F1_Class 1           0.8852459192276001
       F1_Class 2           0.37837839126586914
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1222
> Total counterfactuals added 1222
> Counterfactual size 367 at rate 0.30000000000000004
> Train with CF samples 9062


Global seed set to 42


284 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 284/284 [00:48<00:00,  5.87it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 18.65it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 18.31it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 18.34it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 18.19it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 18.20it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 18.20it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 18.21it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 18.18it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 284/284 [00:52<00:00,  5.37it/s, val_loss=0.0966, val_f1_macro_


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1420-v3.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1420-v3.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:02<00:00, 14.86it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9622815251350403
        F1-Macro            0.8529810905456543
       F1-Weighted          0.9612276554107666
       F1_Class 0            0.980587899684906
       F1_Class 1           0.8908554315567017
       F1_Class 2                 0.6875
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1224
> Total counterfactuals added 1224
> Counterfactual size 367 at rate 0.30000000000000004
> Train with CF samples 9063


Global seed set to 42


284 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 284/284 [00:53<00:00,  5.30it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 18.14it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 17.54it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 17.31it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 17.28it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 17.35it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 17.39it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 17.41it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 17.38it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 284/284 [00:57<00:00,  4.90it/s, val_loss=0.985, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1420-v4.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1420-v4.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:01<00:00, 18.65it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.7718491554260254
        F1-Macro            0.5906022191047668
       F1-Weighted          0.7988455295562744
       F1_Class 0           0.8525845408439636
       F1_Class 1           0.5743944644927979
       F1_Class 2           0.3448275923728943
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1229
> Total counterfactuals added 1229
> Counterfactual size 492 at rate 0.4
> Train with CF samples 9187


Global seed set to 42


288 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 288/288 [00:54<00:00,  5.30it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 18.12it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 17.67it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 17.41it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 17.27it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 17.29it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 17.30it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 17.31it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 17.28it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 288/288 [00:58<00:00,  4.89it/s, val_loss=0.524, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1440.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1440.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:01<00:00, 17.51it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9024838805198669
        F1-Macro            0.6835237145423889
       F1-Weighted          0.8977746367454529
       F1_Class 0            0.949170708656311
       F1_Class 1           0.6639004349708557
       F1_Class 2                 0.4375
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1223
> Total counterfactuals added 1223
> Counterfactual size 489 at rate 0.4
> Train with CF samples 9184


Global seed set to 42


287 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 287/287 [00:54<00:00,  5.29it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 19.79it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 18.83it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 18.95it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 18.73it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 18.79it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 18.85it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 18.87it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 18.86it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 287/287 [00:58<00:00,  4.88it/s, val_loss=0.233, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1435.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1435.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:01<00:00, 18.42it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9429622888565063
        F1-Macro            0.7637273073196411
       F1-Weighted          0.9432193040847778
       F1_Class 0           0.9697986841201782
       F1_Class 1           0.8724035620689392
       F1_Class 2           0.44897958636283875
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1234
> Total counterfactuals added 1234
> Counterfactual size 494 at rate 0.4
> Train with CF samples 9189


Global seed set to 42


288 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 288/288 [00:54<00:00,  5.30it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 18.82it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 17.97it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 17.83it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 17.74it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 17.78it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 17.79it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 17.81it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 17.79it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 288/288 [00:58<00:00,  4.88it/s, val_loss=0.228, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1440-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1440-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:01<00:00, 17.68it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9346826076507568
        F1-Macro            0.7197585105895996
       F1-Weighted          0.9346349835395813
       F1_Class 0           0.9624999761581421
       F1_Class 1           0.8519480228424072
       F1_Class 2           0.3448275923728943
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1222
> Total counterfactuals added 1222
> Counterfactual size 489 at rate 0.4
> Train with CF samples 9184


Global seed set to 42


287 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 287/287 [00:49<00:00,  5.85it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 18.89it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 18.42it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 18.33it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 18.17it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 18.17it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 18.18it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 18.18it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 18.11it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 287/287 [00:53<00:00,  5.36it/s, val_loss=0.0987, val_f1_macro_


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1435-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1435-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:02<00:00, 14.83it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9641214609146118
        F1-Macro            0.8566733598709106
       F1-Weighted          0.9639931321144104
       F1_Class 0           0.9825155138969421
       F1_Class 1            0.897849440574646
       F1_Class 2           0.6896551847457886
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1224
> Total counterfactuals added 1224
> Counterfactual size 490 at rate 0.4
> Train with CF samples 9186


Global seed set to 42


288 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 288/288 [00:54<00:00,  5.29it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 18.07it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 17.57it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 17.36it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 17.29it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 17.32it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 17.36it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 17.40it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 17.39it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 288/288 [00:58<00:00,  4.88it/s, val_loss=0.747, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1440-v2.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1440-v2.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:01<00:00, 18.72it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy             0.835326611995697
        F1-Macro            0.6714098453521729
       F1-Weighted          0.8514485359191895
       F1_Class 0           0.8994515538215637
       F1_Class 1           0.6481113433837891
       F1_Class 2           0.46666666865348816
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1229
> Total counterfactuals added 1229
> Counterfactual size 614 at rate 0.5
> Train with CF samples 9309


Global seed set to 42


291 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 291/291 [00:55<00:00,  5.28it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 17.77it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 17.48it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 17.44it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 17.26it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 17.27it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 17.29it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 17.30it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 17.27it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 291/291 [00:59<00:00,  4.88it/s, val_loss=0.453, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1455.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1455.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:01<00:00, 17.56it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9089236259460449
        F1-Macro             0.707903265953064
       F1-Weighted          0.9030914902687073
       F1_Class 0           0.9540106654167175
       F1_Class 1           0.6612244844436646
       F1_Class 2            0.508474588394165
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1223
> Total counterfactuals added 1223
> Counterfactual size 612 at rate 0.5
> Train with CF samples 9307


Global seed set to 42


291 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 291/291 [00:55<00:00,  5.29it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 20.07it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 19.42it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 19.34it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 19.11it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 19.11it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 19.11it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 19.12it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 19.10it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 291/291 [00:59<00:00,  4.89it/s, val_loss=0.232, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1455-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1455-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:01<00:00, 18.51it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy             0.935602605342865
        F1-Macro            0.7466064095497131
       F1-Weighted          0.9363199472427368
       F1_Class 0           0.9664429426193237
       F1_Class 1           0.8502994179725647
       F1_Class 2           0.42307692766189575
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1234
> Total counterfactuals added 1234
> Counterfactual size 617 at rate 0.5
> Train with CF samples 9312


Global seed set to 42


291 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 291/291 [00:55<00:00,  5.28it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 18.27it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 17.53it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 17.66it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 17.62it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 17.67it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 17.72it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 17.75it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 17.74it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 291/291 [00:59<00:00,  4.88it/s, val_loss=0.193, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1455-v2.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1455-v2.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:01<00:00, 17.59it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9521619081497192
        F1-Macro            0.7520452737808228
       F1-Weighted          0.9506943225860596
       F1_Class 0           0.9722838401794434
       F1_Class 1           0.8967551589012146
       F1_Class 2           0.3870967626571655
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1222
> Total counterfactuals added 1222
> Counterfactual size 611 at rate 0.5
> Train with CF samples 9306


Global seed set to 42


291 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 291/291 [00:49<00:00,  5.85it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 18.75it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 18.26it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 18.20it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 17.97it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 18.01it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 18.02it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 18.03it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 17.97it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 291/291 [00:54<00:00,  5.36it/s, val_loss=0.121, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1455-v3.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1455-v3.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:02<00:00, 14.83it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9622815251350403
        F1-Macro            0.8008086681365967
       F1-Weighted          0.9594606757164001
       F1_Class 0           0.9795240759849548
       F1_Class 1           0.9011628031730652
       F1_Class 2            0.52173912525177
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1224
> Total counterfactuals added 1224
> Counterfactual size 612 at rate 0.5
> Train with CF samples 9308


Global seed set to 42


291 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 291/291 [00:55<00:00,  5.29it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 18.36it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 17.88it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 17.76it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 17.56it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 17.55it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 17.54it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 17.55it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 17.52it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 291/291 [00:59<00:00,  4.88it/s, val_loss=0.972, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1455-v4.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1455-v4.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:01<00:00, 18.68it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.8178473114967346
        F1-Macro            0.6813612580299377
       F1-Weighted          0.8379477858543396
       F1_Class 0           0.8849009871482849
       F1_Class 1           0.6297709941864014
       F1_Class 2            0.529411792755127
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1229
> Total counterfactuals added 1229
> Counterfactual size 737 at rate 0.6
> Train with CF samples 9432


Global seed set to 42


295 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 295/295 [00:55<00:00,  5.29it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 17.64it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 17.42it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 17.24it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 17.17it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 17.19it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 17.19it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 17.22it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 17.21it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 295/295 [01:00<00:00,  4.88it/s, val_loss=0.426, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1475.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1475.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:01<00:00, 17.55it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9098436236381531
        F1-Macro            0.7022514939308167
       F1-Weighted          0.9056103825569153
       F1_Class 0            0.95320063829422
       F1_Class 1           0.6920152306556702
       F1_Class 2           0.4615384638309479
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1223
> Total counterfactuals added 1223
> Counterfactual size 734 at rate 0.6
> Train with CF samples 9429


Global seed set to 42


295 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 295/295 [00:55<00:00,  5.30it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 19.97it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 19.38it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 19.30it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 19.11it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 19.13it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 19.13it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 19.15it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 19.12it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 295/295 [00:59<00:00,  4.93it/s, val_loss=0.292, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1475-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1475-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:01<00:00, 18.48it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy             0.929162859916687
        F1-Macro            0.7682774066925049
       F1-Weighted          0.9245316982269287
       F1_Class 0           0.9600437879562378
       F1_Class 1           0.7947883009910583
       F1_Class 2            0.550000011920929
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1234
> Total counterfactuals added 1234
> Counterfactual size 740 at rate 0.6
> Train with CF samples 9435


Global seed set to 42


295 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 295/295 [00:55<00:00,  5.29it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 18.36it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 18.02it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 17.99it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 17.89it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 17.91it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 17.94it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 17.93it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 17.86it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 295/295 [01:00<00:00,  4.90it/s, val_loss=0.172, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1475-v2.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1475-v2.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:01<00:00, 17.73it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9512419700622559
        F1-Macro            0.7395778894424438
       F1-Weighted          0.9492732286453247
       F1_Class 0           0.9727019667625427
       F1_Class 1           0.8888888955116272
       F1_Class 2           0.3571428656578064
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1222
> Total counterfactuals added 1222
> Counterfactual size 733 at rate 0.6
> Train with CF samples 9428


Global seed set to 42


295 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 295/295 [00:50<00:00,  5.87it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 19.08it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 18.53it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 18.31it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 18.08it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 18.08it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 18.10it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 18.11it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 18.07it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 295/295 [00:54<00:00,  5.40it/s, val_loss=0.122, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1475-v3.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1475-v3.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:02<00:00, 14.80it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9503219723701477
        F1-Macro            0.7646965384483337
       F1-Weighted          0.9467483162879944
       F1_Class 0           0.9736553430557251
       F1_Class 1           0.8588957190513611
       F1_Class 2           0.4615384638309479
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1224
> Total counterfactuals added 1224
> Counterfactual size 734 at rate 0.6
> Train with CF samples 9430


Global seed set to 42


295 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 295/295 [00:55<00:00,  5.30it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 17.29it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 17.35it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 17.44it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 17.37it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 17.39it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 17.40it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 17.43it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 17.40it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 295/295 [01:00<00:00,  4.89it/s, val_loss=1.010, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1475-v4.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1475-v4.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:01<00:00, 18.65it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.7976080775260925
        F1-Macro            0.6290221214294434
       F1-Weighted           0.820583701133728
       F1_Class 0           0.8721804618835449
       F1_Class 1           0.6010928750038147
       F1_Class 2           0.4137931168079376
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1229
> Total counterfactuals added 1229
> Counterfactual size 860 at rate 0.7000000000000001
> Train with CF samples 9555


Global seed set to 42


299 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 299/299 [00:56<00:00,  5.30it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 18.00it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 17.22it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 17.23it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 17.13it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 17.19it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 17.24it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 17.26it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 17.22it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 299/299 [01:01<00:00,  4.90it/s, val_loss=0.538, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1495.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1495.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:01<00:00, 17.50it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy             0.893284261226654
        F1-Macro            0.6434520483016968
       F1-Weighted          0.8841068744659424
       F1_Class 0           0.9500531554222107
       F1_Class 1           0.5636363625526428
       F1_Class 2           0.4166666567325592
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1223
> Total counterfactuals added 1223
> Counterfactual size 856 at rate 0.7000000000000001
> Train with CF samples 9551


Global seed set to 42


299 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 299/299 [00:56<00:00,  5.30it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 19.90it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 18.93it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 19.03it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 18.96it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 18.99it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 19.03it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 19.05it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 19.04it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 299/299 [01:00<00:00,  4.91it/s, val_loss=0.225, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1495-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1495-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:01<00:00, 18.46it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9438822269439697
        F1-Macro            0.7814850807189941
       F1-Weighted          0.9426829814910889
       F1_Class 0           0.9696629047393799
       F1_Class 1           0.8619718551635742
       F1_Class 2           0.5128205418586731
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1234
> Total counterfactuals added 1234
> Counterfactual size 864 at rate 0.7000000000000001
> Train with CF samples 9559


Global seed set to 42


299 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 299/299 [00:56<00:00,  5.30it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 18.46it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 17.63it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 17.72it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 17.62it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 17.70it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 17.74it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 17.77it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 17.78it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 299/299 [01:01<00:00,  4.88it/s, val_loss=0.215, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1495-v2.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1495-v2.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:01<00:00, 17.69it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9420422911643982
        F1-Macro            0.7664583325386047
       F1-Weighted          0.9396494626998901
       F1_Class 0           0.9663169384002686
       F1_Class 1           0.8502994179725647
       F1_Class 2           0.48275861144065857
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1222
> Total counterfactuals added 1222
> Counterfactual size 855 at rate 0.7000000000000001
> Train with CF samples 9550


Global seed set to 42


299 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 299/299 [00:50<00:00,  5.87it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 18.73it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 18.37it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 18.36it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 18.21it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 18.21it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 18.21it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 18.22it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 18.20it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 299/299 [00:55<00:00,  5.40it/s, val_loss=0.116, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1495-v3.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1495-v3.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:02<00:00, 14.86it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9622815251350403
        F1-Macro            0.7796486616134644
       F1-Weighted          0.9590064287185669
       F1_Class 0           0.9795013666152954
       F1_Class 1           0.9048991203308105
       F1_Class 2           0.4545454680919647
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1224
> Total counterfactuals added 1224
> Counterfactual size 857 at rate 0.7000000000000001
> Train with CF samples 9553


Global seed set to 42


299 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 299/299 [00:56<00:00,  5.30it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 18.43it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 17.74it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 17.49it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 17.40it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 17.42it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 17.45it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 17.46it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 17.44it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 299/299 [01:00<00:00,  4.91it/s, val_loss=0.914, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1495-v4.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1495-v4.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:01<00:00, 18.72it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.7948482036590576
        F1-Macro            0.5175009965896606
       F1-Weighted          0.8143472671508789
       F1_Class 0           0.8735920190811157
       F1_Class 1           0.5989110469818115
       F1_Class 2           0.07999999821186066
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1229
> Total counterfactuals added 1229
> Counterfactual size 983 at rate 0.8
> Train with CF samples 9678


Global seed set to 42


303 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 303/303 [00:57<00:00,  5.30it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 17.90it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 17.60it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 17.39it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 17.29it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 17.31it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 17.34it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 17.34it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 17.32it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 303/303 [01:01<00:00,  4.90it/s, val_loss=0.593, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1515.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1515.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:01<00:00, 17.51it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9015639424324036
        F1-Macro            0.6752861142158508
       F1-Weighted          0.8895525336265564
       F1_Class 0            0.949367105960846
       F1_Class 1           0.5964912176132202
       F1_Class 2           0.47999998927116394
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1223
> Total counterfactuals added 1223
> Counterfactual size 978 at rate 0.8
> Train with CF samples 9673


Global seed set to 42


303 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 303/303 [00:57<00:00,  5.30it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 19.56it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 19.19it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 19.16it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 18.98it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 19.01it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 19.00it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 18.99it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 18.94it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 303/303 [01:01<00:00,  4.92it/s, val_loss=0.236, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1515-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1515-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:01<00:00, 18.48it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9328426718711853
        F1-Macro            0.7648600339889526
       F1-Weighted          0.9342119693756104
       F1_Class 0            0.962590754032135
       F1_Class 1           0.8492307662963867
       F1_Class 2           0.48275861144065857
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1234
> Total counterfactuals added 1234
> Counterfactual size 987 at rate 0.8
> Train with CF samples 9682


Global seed set to 42


303 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 303/303 [00:57<00:00,  5.30it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 18.63it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 17.60it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 17.73it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 17.68it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 17.74it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 17.78it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 17.81it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 17.80it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 303/303 [01:01<00:00,  4.90it/s, val_loss=0.224, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1515-v2.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1515-v2.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:01<00:00, 17.72it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9484820365905762
        F1-Macro            0.7641523480415344
       F1-Weighted          0.9476020336151123
       F1_Class 0           0.9698996543884277
       F1_Class 1           0.8850574493408203
       F1_Class 2                 0.4375
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1222
> Total counterfactuals added 1222
> Counterfactual size 978 at rate 0.8
> Train with CF samples 9673


Global seed set to 42


303 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 303/303 [00:51<00:00,  5.87it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 18.83it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 18.17it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 17.94it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 17.86it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 17.95it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 18.01it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 18.05it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 18.03it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 303/303 [00:56<00:00,  5.39it/s, val_loss=0.115, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1515-v3.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1515-v3.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:02<00:00, 14.84it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9595215916633606
        F1-Macro             0.828460693359375
       F1-Weighted          0.9576805233955383
       F1_Class 0           0.9784649610519409
       F1_Class 1            0.886227548122406
       F1_Class 2           0.6206896305084229
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1224
> Total counterfactuals added 1224
> Counterfactual size 979 at rate 0.8
> Train with CF samples 9675


Global seed set to 42


303 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 303/303 [00:57<00:00,  5.30it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 18.01it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 17.45it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 17.34it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 17.27it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 17.30it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 17.31it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 17.34it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 17.31it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 303/303 [01:01<00:00,  4.89it/s, val_loss=0.862, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1515-v4.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1515-v4.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:01<00:00, 18.58it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.8206071853637695
        F1-Macro            0.6218143105506897
       F1-Weighted          0.8382726907730103
       F1_Class 0            0.88984614610672
       F1_Class 1           0.6307692527770996
       F1_Class 2           0.3448275923728943
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1229
> Total counterfactuals added 1229
> Counterfactual size 1106 at rate 0.9
> Train with CF samples 9801


Global seed set to 42


307 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 307/307 [00:57<00:00,  5.30it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 17.93it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 17.17it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 17.26it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 17.10it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 17.15it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 17.20it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 17.24it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 17.22it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 307/307 [01:02<00:00,  4.92it/s, val_loss=0.521, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1535.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1535.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:01<00:00, 17.49it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9015639424324036
        F1-Macro             0.683878481388092
       F1-Weighted          0.8887210488319397
       F1_Class 0           0.9463722109794617
       F1_Class 1           0.6052631735801697
       F1_Class 2                   0.5
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1223
> Total counterfactuals added 1223
> Counterfactual size 1101 at rate 0.9
> Train with CF samples 9796


Global seed set to 42


307 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 307/307 [00:57<00:00,  5.30it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 19.30it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 18.53it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 18.76it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 18.71it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 18.77it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 18.86it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 18.90it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 18.91it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 307/307 [01:02<00:00,  4.94it/s, val_loss=0.267, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1535-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1535-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:01<00:00, 18.37it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9310027360916138
        F1-Macro            0.7428693175315857
       F1-Weighted          0.9263359904289246
       F1_Class 0           0.9614961743354797
       F1_Class 1           0.8099688291549683
       F1_Class 2           0.4571428596973419
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1234
> Total counterfactuals added 1234
> Counterfactual size 1111 at rate 0.9
> Train with CF samples 9806


Global seed set to 42


307 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 307/307 [00:57<00:00,  5.31it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 18.10it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 17.51it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 17.67it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 17.60it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 17.66it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 17.71it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 17.74it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 17.75it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 307/307 [01:02<00:00,  4.92it/s, val_loss=0.194, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1535-v2.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1535-v2.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:01<00:00, 17.75it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9503219723701477
        F1-Macro            0.7602505683898926
       F1-Weighted          0.9481934309005737
       F1_Class 0           0.9717137813568115
       F1_Class 1           0.8804664611816406
       F1_Class 2           0.4285714328289032
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1222
> Total counterfactuals added 1222
> Counterfactual size 1100 at rate 0.9
> Train with CF samples 9795


Global seed set to 42


307 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 307/307 [00:52<00:00,  5.87it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 18.72it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 17.85it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 18.00it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 17.94it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 18.00it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 18.03it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 18.05it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 18.03it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 307/307 [00:56<00:00,  5.41it/s, val_loss=0.106, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1535-v3.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1535-v3.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:02<00:00, 14.88it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9586016535758972
        F1-Macro            0.7882846593856812
       F1-Weighted          0.9554818272590637
       F1_Class 0           0.9779492616653442
       F1_Class 1            0.886904776096344
       F1_Class 2                   0.5
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1224
> Total counterfactuals added 1224
> Counterfactual size 1102 at rate 0.9
> Train with CF samples 9798


Global seed set to 42


307 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 307/307 [00:57<00:00,  5.30it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 17.88it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 17.32it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 17.24it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 17.18it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 17.25it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 17.31it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 17.36it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 17.36it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 307/307 [01:02<00:00,  4.90it/s, val_loss=0.909, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1535-v4.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1535-v4.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:01<00:00, 18.65it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.8114075660705566
        F1-Macro             0.612336277961731
       F1-Weighted          0.8310772180557251
       F1_Class 0           0.8833746910095215
       F1_Class 1           0.6203007698059082
       F1_Class 2           0.3333333432674408
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1229
> Total counterfactuals added 1229
> Counterfactual size 1229 at rate 1.0
> Train with CF samples 9924


Global seed set to 42


311 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 311/311 [00:58<00:00,  5.30it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 17.94it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 17.60it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 17.34it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 17.20it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 17.19it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 17.23it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 17.23it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 17.21it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 311/311 [01:03<00:00,  4.92it/s, val_loss=0.537, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1555.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1555.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:01<00:00, 17.50it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.8978840708732605
        F1-Macro            0.6729545593261719
       F1-Weighted          0.8866385817527771
       F1_Class 0           0.9465891122817993
       F1_Class 1           0.5922746658325195
       F1_Class 2           0.47999998927116394
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1223
> Total counterfactuals added 1223
> Counterfactual size 1223 at rate 1.0
> Train with CF samples 9918


Global seed set to 42


310 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 310/310 [00:58<00:00,  5.29it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 19.78it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 19.22it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 19.00it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 18.75it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 18.81it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 18.86it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 18.87it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 18.87it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 310/310 [01:03<00:00,  4.91it/s, val_loss=0.236, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1550.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1550.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:01<00:00, 18.49it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9448022246360779
        F1-Macro            0.7774251699447632
       F1-Weighted          0.9427482485771179
       F1_Class 0           0.9700000286102295
       F1_Class 1           0.8622754216194153
       F1_Class 2                   0.5
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1234
> Total counterfactuals added 1234
> Counterfactual size 1234 at rate 1.0
> Train with CF samples 9929


Global seed set to 42


311 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 311/311 [00:58<00:00,  5.31it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 18.54it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 17.87it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 17.75it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 17.65it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 17.70it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 17.72it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 17.74it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 17.72it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 311/311 [01:03<00:00,  4.92it/s, val_loss=0.203, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1555-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1555-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:01<00:00, 17.73it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9475620985031128
        F1-Macro             0.745177149772644
       F1-Weighted          0.9487029314041138
       F1_Class 0           0.9701744318008423
       F1_Class 1           0.8969359397888184
       F1_Class 2           0.3684210479259491
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1222
> Total counterfactuals added 1222
> Counterfactual size 1222 at rate 1.0
> Train with CF samples 9917


Global seed set to 42


310 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 310/310 [00:52<00:00,  5.86it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 18.74it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 18.38it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 18.12it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 18.04it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 18.06it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 18.08it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 18.11it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 18.10it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 310/310 [00:57<00:00,  5.40it/s, val_loss=0.0969, val_f1_macro_


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1550-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1550-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:02<00:00, 14.86it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9641214609146118
        F1-Macro            0.7865265607833862
       F1-Weighted           0.962737500667572
       F1_Class 0            0.982768177986145
       F1_Class 1           0.9101449251174927
       F1_Class 2           0.46666666865348816
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1224
> Total counterfactuals added 1224
> Counterfactual size 1224 at rate 1.0
> Train with CF samples 9920


Global seed set to 42


310 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 310/310 [00:58<00:00,  5.29it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/34 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/34 [00:00<00:01, 18.07it/s][A
Validation DataLoader 0:   6%|█                  | 2/34 [00:00<00:01, 17.76it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/34 [00:00<00:01, 17.62it/s][A
Validation DataLoader 0:  12%|██▏                | 4/34 [00:00<00:01, 17.39it/s][A
Validation DataLoader 0:  15%|██▊                | 5/34 [00:00<00:01, 17.42it/s][A
Validation DataLoader 0:  18%|███▎               | 6/34 [00:00<00:01, 17.44it/s][A
Validation DataLoader 0:  21%|███▉               | 7/34 [00:00<00:01, 17.46it/s][A
Validation DataLoader 0:  24%|████▍              | 8/34 [00:00<00:01, 17.43it/s][A
Validation DataLoader 0:  26%|█████          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 310/310 [01:03<00:00,  4.90it/s, val_loss=1.020, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1550-v2.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1550-v2.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 34/34 [00:01<00:00, 18.77it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.8095676302909851
        F1-Macro             0.679354727268219
       F1-Weighted          0.8313503861427307
       F1_Class 0           0.8786558508872986
       F1_Class 1           0.6188679337501526
       F1_Class 2           0.5405405163764954
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Race (12013, 4)
> Train samples 9610


Global seed set to 42


301 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 301/301 [01:12<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:01, 20.16it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:01, 19.47it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:01, 19.41it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:01, 19.19it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:01, 19.21it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:01, 19.23it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:01, 19.24it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:01, 19.22it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 301/301 [01:17<00:00,  3.90it/s, val_loss=0.324, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1505.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1505.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:02<00:00, 14.18it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9384359121322632
        F1-Macro            0.6369830965995789
       F1-Weighted          0.9305025339126587
       F1_Class 0           0.9688644409179688
       F1_Class 1           0.7135134935379028
       F1_Class 2           0.22857142984867096
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Train samples 9610


Global seed set to 42


301 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Epoch 0: 100%|████████████████████████████████| 301/301 [01:01<00:00,  4.88it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:02, 17.48it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:02, 16.62it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:02, 16.63it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:02, 16.56it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:01, 16.61it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:01, 16.64it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:01, 16.67it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:01, 16.66it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 301/301 [01:06<00:00,  4.51it/s, val_loss=0.179, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1505-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1505-v1.ckpt


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:03<00:00, 11.53it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9559068083763123
        F1-Macro            0.6955395936965942
       F1-Weighted          0.9482793807983398
       F1_Class 0           0.9796484708786011
       F1_Class 1           0.8212560415267944
       F1_Class 2           0.2857142984867096
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Train samples 9610


Global seed set to 42


301 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Epoch 0: 100%|████████████████████████████████| 301/301 [01:12<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:02, 15.84it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:02, 15.64it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:02, 15.59it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:02, 15.45it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:02, 15.48it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:02, 15.48it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:02, 15.49it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:01, 15.45it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 301/301 [01:17<00:00,  3.87it/s, val_loss=0.135, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1505-v2.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1505-v2.ckpt


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:02<00:00, 15.01it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9559068083763123
        F1-Macro            0.7730183601379395
       F1-Weighted          0.9524114727973938
       F1_Class 0            0.981360673904419
       F1_Class 1            0.801980197429657
       F1_Class 2           0.5357142686843872
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Train samples 9611


Global seed set to 42


301 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Epoch 0: 100%|████████████████████████████████| 301/301 [01:12<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:02, 15.61it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:02, 15.44it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:02, 15.34it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:02, 15.28it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:02, 15.29it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:02, 15.31it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:02, 15.31it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:01, 15.28it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 301/301 [01:17<00:00,  3.87it/s, val_loss=0.162, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1505-v3.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1505-v3.ckpt


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:02<00:00, 17.04it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9508742690086365
        F1-Macro            0.7781012654304504
       F1-Weighted          0.9471550583839417
       F1_Class 0           0.9812382459640503
       F1_Class 1            0.734883725643158
       F1_Class 2           0.6181818246841431
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Train samples 9611


Global seed set to 42


301 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Epoch 0: 100%|████████████████████████████████| 301/301 [01:12<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:02, 14.47it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:02, 14.08it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:02, 14.08it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:02, 14.00it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:02, 14.05it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:02, 14.08it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:02, 14.11it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:02, 14.10it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 301/301 [01:18<00:00,  3.85it/s, val_loss=1.190, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1505-v4.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1505-v4.ckpt


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:02<00:00, 15.11it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy             0.696086585521698
        F1-Macro             0.587709903717041
       F1-Weighted          0.7519312500953674
       F1_Class 0           0.7974900007247925
       F1_Class 1           0.4100840389728546
       F1_Class 2           0.5555555820465088
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 880
> Total counterfactuals added 880
> Counterfactual size 88 at rate 0.1
> Train with CF samples 9698


Global seed set to 42


304 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 304/304 [01:13<00:00,  4.15it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:01, 19.82it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:01, 19.06it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:01, 19.10it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:01, 18.91it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:01, 19.00it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:01, 19.06it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:01, 19.10it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:01, 19.10it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 304/304 [01:17<00:00,  3.90it/s, val_loss=0.354, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1520.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1520.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:02<00:00, 14.19it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9334442615509033
        F1-Macro            0.6328661441802979
       F1-Weighted          0.9237420558929443
       F1_Class 0           0.9652650952339172
       F1_Class 1           0.6666666865348816
       F1_Class 2           0.2666666805744171
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 893
> Total counterfactuals added 893
> Counterfactual size 89 at rate 0.1
> Train with CF samples 9699


Global seed set to 42


304 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 304/304 [01:02<00:00,  4.88it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:02, 17.14it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:02, 16.88it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:02, 16.71it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:02, 16.64it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:01, 16.66it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:01, 16.67it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:01, 16.68it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:01, 16.65it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 304/304 [01:06<00:00,  4.54it/s, val_loss=0.143, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1520-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1520-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:03<00:00, 11.52it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9467554092407227
        F1-Macro            0.7192682027816772
       F1-Weighted          0.9458265900611877
       F1_Class 0           0.9803555011749268
       F1_Class 1           0.7512195110321045
       F1_Class 2           0.4262295067310333
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 866
> Total counterfactuals added 866
> Counterfactual size 87 at rate 0.1
> Train with CF samples 9697


Global seed set to 42


304 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 304/304 [01:13<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:02, 15.68it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:02, 15.48it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:02, 15.34it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:02, 15.29it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:02, 15.32it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:02, 15.34it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:02, 15.36it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:01, 15.35it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 304/304 [01:18<00:00,  3.87it/s, val_loss=0.124, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1520-v2.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1520-v2.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:02<00:00, 15.04it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy             0.960066556930542
        F1-Macro            0.7851434350013733
       F1-Weighted          0.9572291970252991
       F1_Class 0           0.9831775426864624
       F1_Class 1           0.8365384340286255
       F1_Class 2           0.5357142686843872
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 862
> Total counterfactuals added 862
> Counterfactual size 86 at rate 0.1
> Train with CF samples 9697


Global seed set to 42


304 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 304/304 [01:13<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:02, 15.68it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:02, 15.34it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:02, 15.22it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:02, 15.17it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:02, 15.21it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:02, 15.24it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:02, 15.26it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:01, 15.26it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 304/304 [01:18<00:00,  3.87it/s, val_loss=0.137, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1520-v3.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1520-v3.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:02<00:00, 17.09it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9550374746322632
        F1-Macro            0.7569937109947205
       F1-Weighted          0.9518529176712036
       F1_Class 0            0.985342800617218
       F1_Class 1           0.7749999761581421
       F1_Class 2           0.5106382966041565
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 859
> Total counterfactuals added 859
> Counterfactual size 86 at rate 0.1
> Train with CF samples 9697


Global seed set to 42


304 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 304/304 [01:13<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:02, 14.25it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:02, 14.01it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:02, 13.97it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:02, 13.95it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:02, 13.98it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:02, 14.01it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:02, 14.04it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:02, 14.04it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 304/304 [01:18<00:00,  3.86it/s, val_loss=1.540, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1520-v4.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1520-v4.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:02<00:00, 15.11it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.6769359111785889
        F1-Macro            0.6192223429679871
       F1-Weighted          0.7374692559242249
       F1_Class 0           0.7802898287773132
       F1_Class 1           0.3877221345901489
       F1_Class 2           0.6896551847457886
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 880
> Total counterfactuals added 880
> Counterfactual size 176 at rate 0.2
> Train with CF samples 9786


Global seed set to 42


306 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 306/306 [01:14<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:01, 19.89it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:01, 19.29it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:01, 19.26it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:01, 19.04it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:01, 19.09it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:01, 19.13it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:01, 19.16it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:01, 19.14it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 306/306 [01:18<00:00,  3.89it/s, val_loss=0.297, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1530.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1530.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:02<00:00, 14.19it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9384359121322632
        F1-Macro            0.6862120032310486
       F1-Weighted          0.9320634007453918
       F1_Class 0           0.9683921337127686
       F1_Class 1            0.699999988079071
       F1_Class 2           0.39024388790130615
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 893
> Total counterfactuals added 893
> Counterfactual size 179 at rate 0.2
> Train with CF samples 9789


Global seed set to 42


306 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 306/306 [01:02<00:00,  4.87it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:02, 17.08it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:02, 16.44it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:02, 16.55it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:02, 16.50it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:01, 16.56it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:01, 16.60it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:01, 16.63it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:01, 16.63it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 306/306 [01:07<00:00,  4.52it/s, val_loss=0.139, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1530-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1530-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:03<00:00, 11.56it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9567387700080872
        F1-Macro            0.7579283714294434
       F1-Weighted          0.9527521133422852
       F1_Class 0           0.9809744954109192
       F1_Class 1           0.8039215803146362
       F1_Class 2           0.4888888895511627
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 866
> Total counterfactuals added 866
> Counterfactual size 173 at rate 0.2
> Train with CF samples 9783


Global seed set to 42


306 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 306/306 [01:14<00:00,  4.13it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:02, 15.98it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:02, 15.66it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:02, 15.61it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:02, 15.50it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:02, 15.49it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:02, 15.50it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:02, 15.50it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:01, 15.47it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 306/306 [01:19<00:00,  3.87it/s, val_loss=0.125, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1530-v2.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1530-v2.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:02<00:00, 15.00it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9608985185623169
        F1-Macro            0.7877671122550964
       F1-Weighted          0.9574337005615234
       F1_Class 0           0.9827345013618469
       F1_Class 1           0.8421052694320679
       F1_Class 2           0.5384615659713745
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 862
> Total counterfactuals added 862
> Counterfactual size 172 at rate 0.2
> Train with CF samples 9783


Global seed set to 42


306 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 306/306 [01:14<00:00,  4.13it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:02, 15.95it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:02, 15.57it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:02, 15.53it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:02, 15.39it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:02, 15.35it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:02, 15.35it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:02, 15.35it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:01, 15.33it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 306/306 [01:19<00:00,  3.87it/s, val_loss=0.136, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1530-v3.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1530-v3.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:02<00:00, 17.04it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9550374746322632
        F1-Macro            0.8021374344825745
       F1-Weighted          0.9530735611915588
       F1_Class 0           0.9820754528045654
       F1_Class 1           0.7802690863609314
       F1_Class 2           0.6440678238868713
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 859
> Total counterfactuals added 859
> Counterfactual size 172 at rate 0.2
> Train with CF samples 9783


Global seed set to 42


306 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 306/306 [01:14<00:00,  4.13it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:02, 14.53it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:02, 14.29it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:02, 14.23it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:02, 14.15it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:02, 14.17it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:02, 14.19it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:02, 14.19it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:02, 14.18it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 306/306 [01:19<00:00,  3.87it/s, val_loss=1.160, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1530-v4.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1530-v4.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:02<00:00, 15.14it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.7410491108894348
        F1-Macro             0.631812572479248
       F1-Weighted          0.7885699272155762
       F1_Class 0           0.8339768052101135
       F1_Class 1           0.4424131512641907
       F1_Class 2           0.6190476417541504
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 880
> Total counterfactuals added 880
> Counterfactual size 264 at rate 0.30000000000000004
> Train with CF samples 9874


Global seed set to 42


309 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 309/309 [01:14<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:01, 20.02it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:01, 19.34it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:01, 19.04it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:01, 18.95it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:01, 18.98it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:01, 19.01it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:01, 19.04it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:01, 19.01it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 309/309 [01:19<00:00,  3.90it/s, val_loss=0.301, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1545.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1545.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:02<00:00, 14.17it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9342762231826782
        F1-Macro            0.6703022718429565
       F1-Weighted          0.9272474050521851
       F1_Class 0           0.9670329689979553
       F1_Class 1           0.6629213690757751
       F1_Class 2            0.380952388048172
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 893
> Total counterfactuals added 893
> Counterfactual size 268 at rate 0.30000000000000004
> Train with CF samples 9878


Global seed set to 42


309 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 309/309 [01:03<00:00,  4.87it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:02, 17.19it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:02, 16.89it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:02, 16.79it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:02, 16.66it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:01, 16.70it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:01, 16.71it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:01, 16.72it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:01, 16.71it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 309/309 [01:08<00:00,  4.53it/s, val_loss=0.170, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1545-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1545-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:03<00:00, 11.54it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9517470598220825
        F1-Macro            0.6688551902770996
       F1-Weighted          0.9448814392089844
       F1_Class 0           0.9805914759635925
       F1_Class 1           0.7878788113594055
       F1_Class 2           0.2380952388048172
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 866
> Total counterfactuals added 866
> Counterfactual size 260 at rate 0.30000000000000004
> Train with CF samples 9870


Global seed set to 42


309 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 309/309 [01:14<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:02, 16.15it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:02, 15.46it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:02, 15.48it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:02, 15.37it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:02, 15.39it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:02, 15.41it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:02, 15.42it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:01, 15.40it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 309/309 [01:19<00:00,  3.88it/s, val_loss=0.139, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1545-v2.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1545-v2.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:02<00:00, 15.02it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9509151577949524
        F1-Macro            0.7609304189682007
       F1-Weighted          0.9470378160476685
       F1_Class 0           0.9786245226860046
       F1_Class 1           0.7708333134651184
       F1_Class 2           0.5333333611488342
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 862
> Total counterfactuals added 862
> Counterfactual size 259 at rate 0.30000000000000004
> Train with CF samples 9870


Global seed set to 42


309 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 309/309 [01:14<00:00,  4.13it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:02, 15.74it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:02, 15.09it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:02, 15.17it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:02, 15.09it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:02, 15.14it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:02, 15.14it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:02, 15.13it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:01, 15.13it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 309/309 [01:19<00:00,  3.87it/s, val_loss=0.144, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1545-v3.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1545-v3.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:02<00:00, 17.01it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9583680033683777
        F1-Macro             0.757714033126831
       F1-Weighted          0.9539807438850403
       F1_Class 0           0.9868173003196716
       F1_Class 1           0.7863247990608215
       F1_Class 2                   0.5
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 859
> Total counterfactuals added 859
> Counterfactual size 258 at rate 0.30000000000000004
> Train with CF samples 9869


Global seed set to 42


309 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 309/309 [01:14<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:02, 14.32it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:02, 14.19it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:02, 14.17it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:02, 14.08it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:02, 14.08it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:02, 14.11it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:02, 14.13it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:02, 14.11it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 309/309 [01:19<00:00,  3.87it/s, val_loss=1.720, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1545-v4.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1545-v4.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:02<00:00, 15.12it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.6810990571975708
        F1-Macro            0.5828391909599304
       F1-Weighted          0.7404078841209412
       F1_Class 0            0.786620557308197
       F1_Class 1           0.38655462861061096
       F1_Class 2           0.5753424763679504
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 880
> Total counterfactuals added 880
> Counterfactual size 352 at rate 0.4
> Train with CF samples 9962


Global seed set to 42


312 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 312/312 [01:15<00:00,  4.15it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:01, 19.82it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:01, 18.72it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:01, 18.90it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:01, 18.68it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:01, 18.79it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:01, 18.84it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:01, 18.92it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:01, 18.93it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 312/312 [01:19<00:00,  3.91it/s, val_loss=0.321, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1560.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1560.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:02<00:00, 14.20it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9359400868415833
        F1-Macro            0.6411104202270508
       F1-Weighted          0.9279229044914246
       F1_Class 0           0.9679780602455139
       F1_Class 1           0.6850828528404236
       F1_Class 2           0.2702702581882477
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 893
> Total counterfactuals added 893
> Counterfactual size 357 at rate 0.4
> Train with CF samples 9967


Global seed set to 42


312 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 312/312 [01:03<00:00,  4.88it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:02, 17.58it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:02, 17.00it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:02, 16.92it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:02, 16.68it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:01, 16.68it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:01, 16.70it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:01, 16.71it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:01, 16.68it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 312/312 [01:08<00:00,  4.52it/s, val_loss=0.148, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1560-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1560-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:03<00:00, 11.55it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9509151577949524
        F1-Macro            0.7069554328918457
       F1-Weighted          0.9471707344055176
       F1_Class 0           0.9800463914871216
       F1_Class 1           0.7878788113594055
       F1_Class 2           0.3529411852359772
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 866
> Total counterfactuals added 866
> Counterfactual size 346 at rate 0.4
> Train with CF samples 9956


Global seed set to 42


312 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 312/312 [01:15<00:00,  4.15it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:02, 15.76it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:02, 15.34it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:02, 15.37it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:02, 15.30it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:02, 15.32it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:02, 15.34it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:02, 15.35it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:01, 15.33it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 312/312 [01:20<00:00,  3.88it/s, val_loss=0.145, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1560-v2.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1560-v2.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:02<00:00, 15.00it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9542429447174072
        F1-Macro            0.7484380006790161
       F1-Weighted          0.9488480687141418
       F1_Class 0            0.979098916053772
       F1_Class 1            0.807881772518158
       F1_Class 2           0.4583333432674408
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 862
> Total counterfactuals added 862
> Counterfactual size 345 at rate 0.4
> Train with CF samples 9956


Global seed set to 42


312 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 312/312 [01:15<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:02, 15.65it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:02, 15.43it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:02, 15.33it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:02, 15.22it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:02, 15.24it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:02, 15.26it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:02, 15.27it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:01, 15.27it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 312/312 [01:20<00:00,  3.88it/s, val_loss=0.143, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1560-v3.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1560-v3.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:02<00:00, 17.06it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9558700919151306
        F1-Macro            0.7509621381759644
       F1-Weighted          0.9530024528503418
       F1_Class 0           0.9862754344940186
       F1_Class 1           0.7866109013557434
       F1_Class 2           0.47999998927116394
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 859
> Total counterfactuals added 859
> Counterfactual size 344 at rate 0.4
> Train with CF samples 9955


Global seed set to 42


312 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 312/312 [01:15<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:02, 14.55it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:02, 14.38it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:02, 14.35it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:02, 14.18it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:02, 14.18it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:02, 14.18it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:02, 14.18it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:02, 14.16it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 312/312 [01:20<00:00,  3.87it/s, val_loss=1.120, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1560-v4.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1560-v4.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:02<00:00, 15.13it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.7552040219306946
        F1-Macro            0.6089696884155273
       F1-Weighted          0.7987862825393677
       F1_Class 0            0.847328245639801
       F1_Class 1           0.4462474584579468
       F1_Class 2           0.5333333611488342
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 880
> Total counterfactuals added 880
> Counterfactual size 440 at rate 0.5
> Train with CF samples 10050


Global seed set to 42


315 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 315/315 [01:16<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:01, 19.98it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:01, 19.46it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:01, 19.38it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:01, 19.19it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:01, 19.19it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:01, 19.21it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:01, 19.23it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:01, 19.20it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 315/315 [01:20<00:00,  3.91it/s, val_loss=0.285, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1575.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1575.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:02<00:00, 14.16it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9376040101051331
        F1-Macro            0.6627895832061768
       F1-Weighted          0.9303457140922546
       F1_Class 0           0.9683921337127686
       F1_Class 1            0.695652186870575
       F1_Class 2           0.3243243098258972
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 893
> Total counterfactuals added 893
> Counterfactual size 446 at rate 0.5
> Train with CF samples 10056


Global seed set to 42


315 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 315/315 [01:04<00:00,  4.88it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:02, 17.20it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:02, 16.83it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:02, 16.65it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:02, 16.56it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:01, 16.60it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:01, 16.64it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:01, 16.66it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:01, 16.64it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 315/315 [01:09<00:00,  4.53it/s, val_loss=0.164, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1575-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1575-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:03<00:00, 11.52it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9534109830856323
        F1-Macro             0.692679762840271
       F1-Weighted          0.9466838240623474
       F1_Class 0            0.979233980178833
       F1_Class 1           0.8061224222183228
       F1_Class 2           0.2926829159259796
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 866
> Total counterfactuals added 866
> Counterfactual size 433 at rate 0.5
> Train with CF samples 10043


Global seed set to 42


314 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 314/314 [01:15<00:00,  4.13it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:02, 15.78it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:02, 15.58it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:02, 15.53it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:02, 15.42it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:02, 15.44it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:02, 15.46it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:02, 15.47it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:01, 15.45it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 314/314 [01:21<00:00,  3.87it/s, val_loss=0.151, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1570.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1570.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:02<00:00, 15.01it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9509151577949524
        F1-Macro            0.7581201791763306
       F1-Weighted           0.949216365814209
       F1_Class 0           0.9813258647918701
       F1_Class 1           0.7724867463111877
       F1_Class 2           0.5205479264259338
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 862
> Total counterfactuals added 862
> Counterfactual size 431 at rate 0.5
> Train with CF samples 10042


Global seed set to 42


314 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 314/314 [01:16<00:00,  4.12it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:02, 15.51it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:02, 15.42it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:02, 15.42it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:02, 15.26it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:02, 15.28it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:02, 15.31it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:02, 15.32it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:01, 15.28it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 314/314 [01:21<00:00,  3.87it/s, val_loss=0.161, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1570-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1570-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:02<00:00, 17.06it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9542048573493958
        F1-Macro            0.7527605295181274
       F1-Weighted          0.9499297142028809
       F1_Class 0           0.9844852089881897
       F1_Class 1           0.7631579041481018
       F1_Class 2           0.5106382966041565
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 859
> Total counterfactuals added 859
> Counterfactual size 430 at rate 0.5
> Train with CF samples 10041


Global seed set to 42


314 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 314/314 [01:15<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:02, 14.34it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:02, 14.24it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:02, 14.22it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:02, 14.13it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:02, 14.16it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:02, 14.16it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:02, 14.17it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:02, 14.16it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 314/314 [01:21<00:00,  3.86it/s, val_loss=1.160, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1570-v2.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1570-v2.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:02<00:00, 15.12it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy             0.703580379486084
        F1-Macro            0.5815643668174744
       F1-Weighted          0.7581411004066467
       F1_Class 0           0.8056657314300537
       F1_Class 1           0.4056939482688904
       F1_Class 2           0.5333333611488342
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 880
> Total counterfactuals added 880
> Counterfactual size 528 at rate 0.6
> Train with CF samples 10138


Global seed set to 42


317 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 317/317 [01:16<00:00,  4.13it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:01, 19.57it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:01, 18.84it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:01, 18.57it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:01, 18.58it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:01, 18.67it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:01, 18.71it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:01, 18.77it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:01, 18.75it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 317/317 [01:21<00:00,  3.91it/s, val_loss=0.311, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1585.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1585.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:02<00:00, 14.18it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9359400868415833
        F1-Macro            0.6753218770027161
       F1-Weighted           0.927819013595581
       F1_Class 0           0.9657690525054932
       F1_Class 1           0.6818181872367859
       F1_Class 2           0.37837839126586914
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 893
> Total counterfactuals added 893
> Counterfactual size 536 at rate 0.6
> Train with CF samples 10146


Global seed set to 42


318 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 318/318 [01:05<00:00,  4.88it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:02, 16.96it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:02, 16.53it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:02, 16.58it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:02, 16.50it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:01, 16.52it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:01, 16.54it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:01, 16.56it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:01, 16.54it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 318/318 [01:09<00:00,  4.55it/s, val_loss=0.165, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1590.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1590.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:03<00:00, 11.54it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9492512345314026
        F1-Macro            0.6745216846466064
       F1-Weighted          0.9423160552978516
       F1_Class 0           0.9783509969711304
       F1_Class 1           0.7724867463111877
       F1_Class 2           0.27272728085517883
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 866
> Total counterfactuals added 866
> Counterfactual size 520 at rate 0.6
> Train with CF samples 10130


Global seed set to 42


317 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 317/317 [01:16<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:02, 16.15it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:02, 15.78it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:02, 15.68it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:02, 15.53it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:02, 15.52it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:02, 15.52it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:01, 15.52it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:01, 15.49it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 317/317 [01:21<00:00,  3.88it/s, val_loss=0.130, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1585-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1585-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:02<00:00, 15.01it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy             0.961730420589447
        F1-Macro            0.7851978540420532
       F1-Weighted          0.9585902690887451
       F1_Class 0           0.9835757613182068
       F1_Class 1           0.8520179390907288
       F1_Class 2           0.5199999809265137
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 862
> Total counterfactuals added 862
> Counterfactual size 517 at rate 0.6
> Train with CF samples 10128


Global seed set to 42


317 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 317/317 [01:16<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:02, 15.67it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:02, 15.24it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:02, 15.19it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:02, 15.05it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:02, 15.11it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:02, 15.15it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:02, 15.18it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:01, 15.19it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 317/317 [01:21<00:00,  3.88it/s, val_loss=0.137, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1585-v2.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1585-v2.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:02<00:00, 17.03it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9558700919151306
        F1-Macro            0.7798593044281006
       F1-Weighted          0.9534871578216553
       F1_Class 0           0.9848771095275879
       F1_Class 1           0.7777777910232544
       F1_Class 2           0.5769230723381042
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 859
> Total counterfactuals added 859
> Counterfactual size 515 at rate 0.6
> Train with CF samples 10126


Global seed set to 42


317 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 317/317 [01:16<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:02, 14.48it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:02, 14.31it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:02, 14.10it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:02, 14.04it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:02, 14.07it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:02, 14.09it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:02, 14.11it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:02, 14.10it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 317/317 [01:21<00:00,  3.87it/s, val_loss=0.853, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1585-v3.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1585-v3.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:02<00:00, 15.14it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy             0.756036639213562
        F1-Macro            0.5815693140029907
       F1-Weighted          0.7981956005096436
       F1_Class 0           0.8481561541557312
       F1_Class 1           0.4521072804927826
       F1_Class 2           0.4444444477558136
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 880
> Total counterfactuals added 880
> Counterfactual size 616 at rate 0.7000000000000001
> Train with CF samples 10226


Global seed set to 42


320 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 320/320 [01:17<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:01, 19.98it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:01, 19.55it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:01, 19.46it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:01, 19.22it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:01, 19.24it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:01, 19.25it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:01, 19.25it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:01, 19.20it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 320/320 [01:21<00:00,  3.91it/s, val_loss=0.288, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1600.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1600.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:02<00:00, 14.19it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9359400868415833
        F1-Macro            0.6572471857070923
       F1-Weighted           0.928371012210846
       F1_Class 0           0.9670631289482117
       F1_Class 1           0.6888889074325562
       F1_Class 2           0.31578946113586426
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 893
> Total counterfactuals added 893
> Counterfactual size 625 at rate 0.7000000000000001
> Train with CF samples 10235


Global seed set to 42


320 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 320/320 [01:05<00:00,  4.87it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:02, 17.06it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:02, 16.87it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:02, 16.73it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:02, 16.62it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:01, 16.65it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:01, 16.67it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:01, 16.68it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:01, 16.65it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 320/320 [01:10<00:00,  4.53it/s, val_loss=0.179, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1600-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1600-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:03<00:00, 11.51it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9475873708724976
        F1-Macro            0.6552726030349731
       F1-Weighted          0.9414264559745789
       F1_Class 0           0.9791955351829529
       F1_Class 1           0.7692307829856873
       F1_Class 2           0.21739129722118378
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 866
> Total counterfactuals added 866
> Counterfactual size 606 at rate 0.7000000000000001
> Train with CF samples 10216


Global seed set to 42


320 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 320/320 [01:17<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:02, 16.21it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:02, 15.60it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:02, 15.47it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:02, 15.32it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:02, 15.36it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:02, 15.38it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:02, 15.40it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:01, 15.39it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 320/320 [01:22<00:00,  3.89it/s, val_loss=0.126, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1600-v2.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1600-v2.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:02<00:00, 15.03it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9592345952987671
        F1-Macro            0.7991689443588257
       F1-Weighted           0.955634593963623
       F1_Class 0           0.9818519949913025
       F1_Class 1           0.8118811845779419
       F1_Class 2           0.6037735939025879
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 862
> Total counterfactuals added 862
> Counterfactual size 603 at rate 0.7000000000000001
> Train with CF samples 10214


Global seed set to 42


320 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 320/320 [01:17<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:02, 15.67it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:02, 15.18it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:02, 15.23it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:02, 15.08it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:02, 15.12it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:02, 15.15it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:02, 15.17it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:01, 15.17it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 320/320 [01:22<00:00,  3.89it/s, val_loss=0.130, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1600-v3.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1600-v3.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:02<00:00, 17.06it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9592006802558899
        F1-Macro            0.7934330105781555
       F1-Weighted          0.9579033255577087
       F1_Class 0           0.9871976971626282
       F1_Class 1           0.7966101765632629
       F1_Class 2           0.5964912176132202
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 859
> Total counterfactuals added 859
> Counterfactual size 601 at rate 0.7000000000000001
> Train with CF samples 10212


Global seed set to 42


320 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 320/320 [01:17<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:02, 14.65it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:02, 14.30it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:02, 14.15it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:02, 14.09it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:02, 14.10it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:02, 14.12it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:02, 14.13it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:02, 14.10it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 320/320 [01:22<00:00,  3.88it/s, val_loss=1.090, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1600-v4.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1600-v4.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:02<00:00, 15.10it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.7518734335899353
        F1-Macro            0.6100748181343079
       F1-Weighted          0.7960577011108398
       F1_Class 0           0.8438864350318909
       F1_Class 1           0.4478764533996582
       F1_Class 2           0.5384615659713745
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 880
> Total counterfactuals added 880
> Counterfactual size 704 at rate 0.8
> Train with CF samples 10314


Global seed set to 42


323 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 323/323 [01:17<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:01, 20.39it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:01, 19.23it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:01, 19.03it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:01, 18.96it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:01, 19.01it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:01, 19.07it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:01, 19.12it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:01, 19.12it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 323/323 [01:22<00:00,  3.92it/s, val_loss=0.286, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1615.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1615.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:02<00:00, 14.21it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9359400868415833
        F1-Macro            0.6722021102905273
       F1-Weighted           0.928352952003479
       F1_Class 0            0.966620922088623
       F1_Class 1           0.6815642714500427
       F1_Class 2           0.3684210479259491
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 893
> Total counterfactuals added 893
> Counterfactual size 714 at rate 0.8
> Train with CF samples 10324


Global seed set to 42


323 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 323/323 [01:06<00:00,  4.89it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:02, 17.07it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:02, 16.68it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:02, 16.50it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:02, 16.44it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:01, 16.51it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:01, 16.56it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:01, 16.60it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:01, 16.60it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 323/323 [01:11<00:00,  4.55it/s, val_loss=0.139, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1615-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1615-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:03<00:00, 11.52it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9542429447174072
        F1-Macro            0.6753612756729126
       F1-Weighted          0.9457704424858093
       F1_Class 0           0.9791955351829529
       F1_Class 1           0.8115941882133484
       F1_Class 2           0.23529411852359772
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 866
> Total counterfactuals added 866
> Counterfactual size 693 at rate 0.8
> Train with CF samples 10303


Global seed set to 42


322 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 322/322 [01:17<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:02, 15.92it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:02, 15.47it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:02, 15.48it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:02, 15.34it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:02, 15.36it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:02, 15.40it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:02, 15.41it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:01, 15.40it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 322/322 [01:22<00:00,  3.88it/s, val_loss=0.128, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1610.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1610.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:02<00:00, 15.00it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9559068083763123
        F1-Macro            0.7862495183944702
       F1-Weighted          0.9527261257171631
       F1_Class 0           0.9804469347000122
       F1_Class 1           0.8020304441452026
       F1_Class 2           0.5762711763381958
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 862
> Total counterfactuals added 862
> Counterfactual size 690 at rate 0.8
> Train with CF samples 10301


Global seed set to 42


322 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 322/322 [01:17<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:02, 15.42it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:02, 15.06it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:02, 15.14it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:02, 15.10it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:02, 15.15it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:02, 15.19it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:02, 15.22it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:01, 15.21it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 322/322 [01:22<00:00,  3.88it/s, val_loss=0.124, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1610-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1610-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:02<00:00, 17.14it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9575353860855103
        F1-Macro            0.7934548854827881
       F1-Weighted          0.9557003974914551
       F1_Class 0            0.985342800617218
       F1_Class 1           0.7878788113594055
       F1_Class 2           0.6071428656578064
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 859
> Total counterfactuals added 859
> Counterfactual size 687 at rate 0.8
> Train with CF samples 10298


Global seed set to 42


322 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 322/322 [01:17<00:00,  4.13it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:02, 14.53it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:02, 14.22it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:02, 14.23it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:02, 14.12it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:02, 14.13it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:02, 14.12it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:02, 14.13it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:02, 14.12it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 322/322 [01:23<00:00,  3.87it/s, val_loss=1.400, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1610-v2.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1610-v2.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:02<00:00, 15.10it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.7094088196754456
        F1-Macro            0.5977685451507568
       F1-Weighted          0.7627514004707336
       F1_Class 0           0.8085827231407166
       F1_Class 1           0.41868510842323303
       F1_Class 2           0.5660377144813538
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 880
> Total counterfactuals added 880
> Counterfactual size 792 at rate 0.9
> Train with CF samples 10402


Global seed set to 42


326 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 326/326 [01:18<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:01, 20.35it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:01, 19.19it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:01, 19.19it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:01, 19.07it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:01, 19.11it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:01, 19.14it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:01, 19.15it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:01, 19.11it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 326/326 [01:23<00:00,  3.91it/s, val_loss=0.310, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1630.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1630.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:02<00:00, 14.17it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9351081252098083
        F1-Macro            0.6544593572616577
       F1-Weighted          0.9263767004013062
       F1_Class 0            0.96712327003479
       F1_Class 1           0.6629213690757751
       F1_Class 2           0.3333333432674408
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 893
> Total counterfactuals added 893
> Counterfactual size 804 at rate 0.9
> Train with CF samples 10414


Global seed set to 42


326 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 326/326 [01:13<00:00,  4.43it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:02, 17.20it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:02, 16.82it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:02, 16.61it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:02, 16.55it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:01, 16.62it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:01, 16.65it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:01, 16.68it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:01, 16.67it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 326/326 [01:18<00:00,  4.15it/s, val_loss=0.229, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1630-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1630-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:03<00:00, 11.56it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9434276223182678
        F1-Macro            0.6427732110023499
       F1-Weighted          0.9331207275390625
       F1_Class 0           0.9729729890823364
       F1_Class 1            0.739130437374115
       F1_Class 2           0.21621622145175934
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 866
> Total counterfactuals added 866
> Counterfactual size 779 at rate 0.9
> Train with CF samples 10389


Global seed set to 42


325 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 325/325 [01:18<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:02, 16.09it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:02, 15.51it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:02, 15.49it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:02, 15.42it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:02, 15.44it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:02, 15.44it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:02, 15.45it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:01, 15.44it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 325/325 [01:23<00:00,  3.87it/s, val_loss=0.129, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1625.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1625.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:02<00:00, 14.98it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9550748467445374
        F1-Macro            0.7721313238143921
       F1-Weighted          0.9520633220672607
       F1_Class 0            0.982292652130127
       F1_Class 1           0.7857142686843872
       F1_Class 2           0.5483871102333069
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 862
> Total counterfactuals added 862
> Counterfactual size 776 at rate 0.9
> Train with CF samples 10387


Global seed set to 42


325 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 325/325 [01:18<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:02, 15.59it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:02, 15.09it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:02, 15.16it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:02, 15.11it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:02, 15.15it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:02, 15.20it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:02, 15.23it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:01, 15.20it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 325/325 [01:23<00:00,  3.89it/s, val_loss=0.121, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1625-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1625-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:02<00:00, 17.06it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9542048573493958
        F1-Macro            0.7709963321685791
       F1-Weighted          0.9521058201789856
       F1_Class 0           0.9838862419128418
       F1_Class 1           0.7800830006599426
       F1_Class 2           0.5490196347236633
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 859
> Total counterfactuals added 859
> Counterfactual size 773 at rate 0.9
> Train with CF samples 10384


Global seed set to 42


325 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 325/325 [01:18<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:02, 14.75it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:02, 14.27it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:02, 14.23it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:02, 14.10it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:02, 14.10it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:02, 14.11it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:02, 14.12it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:02, 14.11it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 325/325 [01:23<00:00,  3.88it/s, val_loss=1.320, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1625-v2.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1625-v2.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:02<00:00, 15.14it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.7152373194694519
        F1-Macro            0.6252526044845581
       F1-Weighted          0.7674843072891235
       F1_Class 0           0.8119369149208069
       F1_Class 1           0.4197530746459961
       F1_Class 2           0.6440678238868713
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 880
> Total counterfactuals added 880
> Counterfactual size 880 at rate 1.0
> Train with CF samples 10490


Global seed set to 42


328 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 328/328 [01:19<00:00,  4.13it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:01, 19.89it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:01, 18.85it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:01, 18.99it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:01, 18.92it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:01, 18.99it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:01, 19.03it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:01, 19.08it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:01, 19.08it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 328/328 [01:23<00:00,  3.92it/s, val_loss=0.356, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1640.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1640.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:02<00:00, 14.24it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9367720484733582
        F1-Macro            0.6442223191261292
       F1-Weighted          0.9271517395973206
       F1_Class 0           0.9680948257446289
       F1_Class 1           0.6704545617103577
       F1_Class 2           0.29411765933036804
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 893
> Total counterfactuals added 893
> Counterfactual size 893 at rate 1.0
> Train with CF samples 10503


Global seed set to 42


329 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 329/329 [01:14<00:00,  4.43it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:02, 17.04it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:02, 16.47it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:02, 16.56it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:02, 16.49it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:01, 16.56it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:01, 16.60it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:01, 16.61it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:01, 16.60it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 329/329 [01:19<00:00,  4.16it/s, val_loss=0.211, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1645.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1645.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:03<00:00, 11.52it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9509151577949524
        F1-Macro            0.6369702816009521
       F1-Weighted          0.9398574233055115
       F1_Class 0           0.9756545424461365
       F1_Class 1           0.8102564215660095
       F1_Class 2                  0.125
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 866
> Total counterfactuals added 866
> Counterfactual size 866 at rate 1.0
> Train with CF samples 10476


Global seed set to 42


328 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 328/328 [01:19<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:02, 16.14it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:02, 15.77it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:02, 15.50it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:02, 15.38it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:02, 15.40it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:02, 15.41it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:02, 15.42it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:01, 15.39it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 328/328 [01:24<00:00,  3.90it/s, val_loss=0.117, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1640-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1640-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:02<00:00, 15.04it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9567387700080872
        F1-Macro            0.7699712514877319
       F1-Weighted           0.952717125415802
       F1_Class 0           0.9813953638076782
       F1_Class 1           0.8100000023841858
       F1_Class 2           0.5185185074806213
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 862
> Total counterfactuals added 862
> Counterfactual size 862 at rate 1.0
> Train with CF samples 10473


Global seed set to 42


328 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 328/328 [01:19<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:02, 15.38it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:02, 15.19it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:02, 15.18it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:02, 15.12it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:02, 15.14it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:02, 15.16it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:02, 15.17it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:01, 15.16it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 328/328 [01:24<00:00,  3.89it/s, val_loss=0.166, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1640-v2.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1640-v2.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:02<00:00, 17.10it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9492089748382568
        F1-Macro             0.761913001537323
       F1-Weighted          0.9453341364860535
       F1_Class 0            0.981698751449585
       F1_Class 1           0.7222222089767456
       F1_Class 2            0.581818163394928
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 859
> Total counterfactuals added 859
> Counterfactual size 859 at rate 1.0
> Train with CF samples 10470


Global seed set to 42


328 38


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 328/328 [01:19<00:00,  4.14it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/38 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/38 [00:00<00:02, 14.44it/s][A
Validation DataLoader 0:   5%|█                  | 2/38 [00:00<00:02, 14.25it/s][A
Validation DataLoader 0:   8%|█▌                 | 3/38 [00:00<00:02, 14.22it/s][A
Validation DataLoader 0:  11%|██                 | 4/38 [00:00<00:02, 14.13it/s][A
Validation DataLoader 0:  13%|██▌                | 5/38 [00:00<00:02, 14.14it/s][A
Validation DataLoader 0:  16%|███                | 6/38 [00:00<00:02, 14.16it/s][A
Validation DataLoader 0:  18%|███▌               | 7/38 [00:00<00:02, 14.16it/s][A
Validation DataLoader 0:  21%|████               | 8/38 [00:00<00:02, 14.15it/s][A
Validation DataLoader 0:  24%|████▌          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 328/328 [01:24<00:00,  3.88it/s, val_loss=1.780, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1640-v3.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1640-v3.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 38/38 [00:02<00:00, 15.14it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.6494587659835815
        F1-Macro            0.5405632853507996
       F1-Weighted          0.7141664028167725
       F1_Class 0           0.7616244554519653
       F1_Class 1           0.3600654602050781
       F1_Class 2                   0.5
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Politics (11018, 4)
> Train samples 8814


Global seed set to 42


276 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 276/276 [00:55<00:00,  4.93it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:01, 19.57it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:01, 19.25it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:01, 18.87it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:01, 18.79it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:01, 18.81it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:01, 18.82it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 18.84it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 18.78it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 276/276 [01:00<00:00,  4.58it/s, val_loss=0.532, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1380-v6.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1380-v6.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:01<00:00, 17.64it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.8738657236099243
        F1-Macro            0.6391399502754211
       F1-Weighted          0.8708255290985107
       F1_Class 0           0.9428076148033142
       F1_Class 1           0.6851385235786438
       F1_Class 2           0.28947368264198303
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Train samples 8814


Global seed set to 42


276 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Epoch 0: 100%|████████████████████████████████| 276/276 [00:54<00:00,  5.09it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:02, 14.72it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:02, 14.38it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:02, 14.39it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:02, 14.25it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:02, 14.28it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:02, 14.29it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 14.30it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 14.26it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 276/276 [00:59<00:00,  4.66it/s, val_loss=0.248, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1380-v7.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1380-v7.ckpt


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:02<00:00, 16.36it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9292196035385132
        F1-Macro            0.7466219067573547
       F1-Weighted          0.9262385368347168
       F1_Class 0           0.9625223278999329
       F1_Class 1           0.8699360489845276
       F1_Class 2           0.40740740299224854
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Train samples 8814


Global seed set to 42


276 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Epoch 0: 100%|████████████████████████████████| 276/276 [00:55<00:00,  4.94it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:02, 16.08it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:02, 15.63it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:02, 15.68it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:01, 15.53it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:01, 15.56it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:01, 15.58it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 15.59it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 15.58it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 276/276 [01:00<00:00,  4.56it/s, val_loss=0.200, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1380-v8.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1380-v8.ckpt


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:02<00:00, 17.00it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9319419264793396
        F1-Macro            0.7311993837356567
       F1-Weighted          0.9282858371734619
       F1_Class 0           0.9595654606819153
       F1_Class 1           0.8810915946960449
       F1_Class 2           0.3529411852359772
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Train samples 8815


Global seed set to 42


276 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Epoch 0: 100%|████████████████████████████████| 276/276 [00:55<00:00,  4.94it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:02, 15.19it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:02, 14.81it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:02, 14.70it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:02, 14.64it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:02, 14.67it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:01, 14.70it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 14.71it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 14.70it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 276/276 [01:00<00:00,  4.54it/s, val_loss=0.132, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1380-v9.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1380-v9.ckpt


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:01<00:00, 18.57it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9491832852363586
        F1-Macro             0.808073103427887
       F1-Weighted           0.947672426700592
       F1_Class 0           0.9771908521652222
       F1_Class 1           0.8888888955116272
       F1_Class 2           0.5581395626068115
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Train samples 8815


Global seed set to 42


276 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Epoch 0: 100%|████████████████████████████████| 276/276 [00:55<00:00,  4.94it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:01, 17.15it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:01, 16.51it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:01, 16.55it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:01, 16.48it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:01, 16.50it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:01, 16.52it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 16.54it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 16.48it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 276/276 [01:00<00:00,  4.56it/s, val_loss=0.854, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1380-v10.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1380-v10.ckpt


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:01<00:00, 19.41it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.8303085565567017
        F1-Macro            0.7136809229850769
       F1-Weighted          0.8427824974060059
       F1_Class 0            0.887399435043335
       F1_Class 1           0.7253414392471313
       F1_Class 2           0.5283018946647644
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1617
> Total counterfactuals added 1617
> Counterfactual size 162 at rate 0.1
> Train with CF samples 8976


Global seed set to 42


281 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 281/281 [00:56<00:00,  4.95it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:01, 19.68it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:01, 19.06it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:01, 19.09it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:01, 18.96it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:01, 18.98it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:01, 19.01it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 19.03it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 19.02it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 281/281 [01:01<00:00,  4.60it/s, val_loss=0.610, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1405.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1405.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:01<00:00, 17.63it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.8684210777282715
        F1-Macro            0.6341460943222046
       F1-Weighted          0.8596462607383728
       F1_Class 0           0.9350057244300842
       F1_Class 1           0.6597402691841125
       F1_Class 2           0.3076923191547394
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1637
> Total counterfactuals added 1637
> Counterfactual size 164 at rate 0.1
> Train with CF samples 8978


Global seed set to 42


281 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 281/281 [00:55<00:00,  5.09it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:02, 14.50it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:02, 14.37it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:02, 14.37it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:02, 14.29it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:02, 14.29it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:02, 14.28it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 14.29it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 14.27it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 281/281 [01:00<00:00,  4.66it/s, val_loss=0.199, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1405-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1405-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:02<00:00, 16.29it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9301270246505737
        F1-Macro            0.7608104348182678
       F1-Weighted          0.9301098585128784
       F1_Class 0           0.9655588865280151
       F1_Class 1           0.8724279999732971
       F1_Class 2           0.4444444477558136
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1637
> Total counterfactuals added 1637
> Counterfactual size 164 at rate 0.1
> Train with CF samples 8978


Global seed set to 42


281 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 281/281 [00:56<00:00,  4.94it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:02, 16.04it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:02, 15.91it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:02, 15.90it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:01, 15.76it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:01, 15.75it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:01, 15.78it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 15.79it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 15.78it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 281/281 [01:01<00:00,  4.56it/s, val_loss=0.177, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1405-v2.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1405-v2.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:02<00:00, 17.08it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9373865723609924
        F1-Macro            0.6924677491188049
       F1-Weighted          0.9321334362030029
       F1_Class 0            0.966292142868042
       F1_Class 1           0.8888888955116272
       F1_Class 2           0.2222222238779068
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1626
> Total counterfactuals added 1626
> Counterfactual size 163 at rate 0.1
> Train with CF samples 8978


Global seed set to 42


281 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 281/281 [00:56<00:00,  4.94it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:02, 15.20it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:02, 14.85it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:02, 14.78it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:02, 14.72it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:02, 14.74it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:01, 14.72it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 14.71it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 14.69it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 281/281 [01:01<00:00,  4.54it/s, val_loss=0.133, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1405-v3.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1405-v3.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:01<00:00, 18.58it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9519056081771851
        F1-Macro            0.8394522666931152
       F1-Weighted          0.9502159357070923
       F1_Class 0           0.9773809313774109
       F1_Class 1           0.8898128867149353
       F1_Class 2           0.6511628031730652
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1647
> Total counterfactuals added 1647
> Counterfactual size 165 at rate 0.1
> Train with CF samples 8980


Global seed set to 42


281 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 281/281 [00:56<00:00,  4.94it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:01, 17.36it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:02, 16.45it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:01, 16.51it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:01, 16.39it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:01, 16.44it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:01, 16.46it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 16.48it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 16.46it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 281/281 [01:01<00:00,  4.57it/s, val_loss=0.664, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1405-v4.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1405-v4.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:01<00:00, 19.28it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.8475499153137207
        F1-Macro            0.7387217283248901
       F1-Weighted          0.8572555780410767
       F1_Class 0           0.8993377685546875
       F1_Class 1            0.745398759841919
       F1_Class 2           0.5714285969734192
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1617
> Total counterfactuals added 1617
> Counterfactual size 323 at rate 0.2
> Train with CF samples 9137


Global seed set to 42


286 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 286/286 [00:57<00:00,  4.94it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:01, 19.96it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:01, 19.20it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:01, 19.15it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:01, 19.02it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:01, 19.03it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:01, 19.03it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 19.05it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 19.02it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 286/286 [01:02<00:00,  4.59it/s, val_loss=0.612, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1430.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1430.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:01<00:00, 17.56it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.8793103694915771
        F1-Macro            0.6654160618782043
       F1-Weighted           0.866062581539154
       F1_Class 0           0.9306260347366333
       F1_Class 1           0.6972010135650635
       F1_Class 2           0.3684210479259491
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1637
> Total counterfactuals added 1637
> Counterfactual size 327 at rate 0.2
> Train with CF samples 9141


Global seed set to 42


286 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 286/286 [00:56<00:00,  5.09it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:02, 14.69it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:02, 14.17it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:02, 14.24it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:02, 14.21it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:02, 14.24it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:02, 14.26it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 14.28it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 14.26it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 286/286 [01:01<00:00,  4.67it/s, val_loss=0.208, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1430-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1430-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:02<00:00, 16.31it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9301270246505737
        F1-Macro            0.7634427547454834
       F1-Weighted          0.9293668270111084
       F1_Class 0           0.9634511470794678
       F1_Class 1           0.8752642869949341
       F1_Class 2           0.4516128897666931
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1637
> Total counterfactuals added 1637
> Counterfactual size 327 at rate 0.2
> Train with CF samples 9141


Global seed set to 42


286 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 286/286 [00:57<00:00,  4.94it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:02, 16.31it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:02, 15.63it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:02, 15.70it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:01, 15.66it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:01, 15.70it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:01, 15.71it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 15.74it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 15.73it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 286/286 [01:02<00:00,  4.57it/s, val_loss=0.168, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1430-v2.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1430-v2.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:02<00:00, 17.13it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9419237971305847
        F1-Macro             0.739871621131897
       F1-Weighted          0.9392703771591187
       F1_Class 0           0.9682063460350037
       F1_Class 1           0.9014084339141846
       F1_Class 2           0.3499999940395355
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1626
> Total counterfactuals added 1626
> Counterfactual size 325 at rate 0.2
> Train with CF samples 9140


Global seed set to 42


286 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 286/286 [00:57<00:00,  4.94it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:02, 15.35it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:02, 14.81it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:02, 14.69it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:02, 14.60it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:02, 14.64it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:01, 14.66it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 14.67it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 14.67it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 286/286 [01:02<00:00,  4.55it/s, val_loss=0.150, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1430-v3.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1430-v3.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:01<00:00, 18.67it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9491832852363586
        F1-Macro            0.7442536354064941
       F1-Weighted          0.9446842074394226
       F1_Class 0           0.9789283275604248
       F1_Class 1           0.8901960849761963
       F1_Class 2           0.3636363744735718
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1647
> Total counterfactuals added 1647
> Counterfactual size 329 at rate 0.2
> Train with CF samples 9144


Global seed set to 42


286 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 286/286 [00:57<00:00,  4.94it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:02, 16.97it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:02, 16.37it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:01, 16.48it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:01, 16.47it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:01, 16.52it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:01, 16.55it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 16.57it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 16.56it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 286/286 [01:02<00:00,  4.56it/s, val_loss=0.783, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1430-v4.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1430-v4.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:01<00:00, 19.30it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy             0.840290367603302
        F1-Macro            0.6816027760505676
       F1-Weighted          0.8496932983398438
       F1_Class 0            0.896276593208313
       F1_Class 1           0.7382753491401672
       F1_Class 2           0.41025641560554504
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1617
> Total counterfactuals added 1617
> Counterfactual size 485 at rate 0.30000000000000004
> Train with CF samples 9299


Global seed set to 42


291 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 291/291 [00:58<00:00,  4.94it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:01, 20.15it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:01, 19.43it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:01, 19.09it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:01, 19.00it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:01, 19.04it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:01, 19.05it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 19.06it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 19.00it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 291/291 [01:03<00:00,  4.59it/s, val_loss=0.622, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1455-v5.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1455-v5.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:01<00:00, 17.61it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.8747731447219849
        F1-Macro            0.6643316149711609
       F1-Weighted          0.8628214001655579
       F1_Class 0           0.9314447641372681
       F1_Class 1           0.6785714030265808
       F1_Class 2           0.38297873735427856
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1637
> Total counterfactuals added 1637
> Counterfactual size 491 at rate 0.30000000000000004
> Train with CF samples 9305


Global seed set to 42


291 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 291/291 [01:07<00:00,  4.32it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:02, 14.64it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:02, 14.45it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:02, 14.36it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:02, 14.22it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:02, 14.25it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:02, 14.27it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 14.28it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 14.26it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 291/291 [01:12<00:00,  4.03it/s, val_loss=0.204, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1455-v6.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1455-v6.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:02<00:00, 16.36it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9283121824264526
        F1-Macro            0.7662525773048401
       F1-Weighted          0.9281402826309204
       F1_Class 0           0.9638988971710205
       F1_Class 1            0.866108775138855
       F1_Class 2                 0.46875
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1637
> Total counterfactuals added 1637
> Counterfactual size 491 at rate 0.30000000000000004
> Train with CF samples 9305


Global seed set to 42


291 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 291/291 [01:07<00:00,  4.34it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:02, 16.11it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:02, 15.82it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:02, 15.81it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:01, 15.73it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:01, 15.75it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:01, 15.76it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 15.77it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 15.76it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 291/291 [01:11<00:00,  4.04it/s, val_loss=0.191, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1455-v7.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1455-v7.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:02<00:00, 17.05it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9319419264793396
        F1-Macro            0.7520622611045837
       F1-Weighted          0.9302730560302734
       F1_Class 0           0.9644128084182739
       F1_Class 1           0.8662420511245728
       F1_Class 2           0.42553192377090454
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1626
> Total counterfactuals added 1626
> Counterfactual size 488 at rate 0.30000000000000004
> Train with CF samples 9303


Global seed set to 42


291 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 291/291 [00:59<00:00,  4.93it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:02, 14.99it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:02, 14.65it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:02, 14.58it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:02, 14.56it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:02, 14.62it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:01, 14.64it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 14.66it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 14.65it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 291/291 [01:03<00:00,  4.56it/s, val_loss=0.157, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1455-v8.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1455-v8.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:01<00:00, 18.63it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9473684430122375
        F1-Macro            0.8208737969398499
       F1-Weighted          0.9475637078285217
       F1_Class 0           0.9749847650527954
       F1_Class 1           0.8918918967247009
       F1_Class 2           0.5957446694374084
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1647
> Total counterfactuals added 1647
> Counterfactual size 494 at rate 0.30000000000000004
> Train with CF samples 9309


Global seed set to 42


291 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 291/291 [01:07<00:00,  4.34it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:01, 17.37it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:01, 16.67it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:01, 16.52it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:01, 16.44it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:01, 16.47it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:01, 16.48it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 16.47it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 16.46it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 291/291 [01:11<00:00,  4.05it/s, val_loss=0.468, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1455-v9.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1455-v9.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:01<00:00, 19.36it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.8629764318466187
        F1-Macro            0.7222521305084229
       F1-Weighted          0.8703292608261108
       F1_Class 0            0.913128674030304
       F1_Class 1           0.7658227682113647
       F1_Class 2           0.4878048896789551
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1617
> Total counterfactuals added 1617
> Counterfactual size 647 at rate 0.4
> Train with CF samples 9461


Global seed set to 42


296 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 296/296 [00:59<00:00,  4.94it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:01, 19.81it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:01, 19.12it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:01, 19.16it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:01, 18.97it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:01, 18.99it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:01, 18.98it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 18.96it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 18.92it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 296/296 [01:04<00:00,  4.62it/s, val_loss=0.647, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1480.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1480.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:01<00:00, 17.58it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.8620689511299133
        F1-Macro            0.6503201127052307
       F1-Weighted          0.8488394021987915
       F1_Class 0           0.9293386340141296
       F1_Class 1           0.6216216087341309
       F1_Class 2           0.4000000059604645
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1637
> Total counterfactuals added 1637
> Counterfactual size 655 at rate 0.4
> Train with CF samples 9469


Global seed set to 42


296 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 296/296 [01:08<00:00,  4.34it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:02, 14.76it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:02, 14.31it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:02, 14.14it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:02, 14.11it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:02, 14.16it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:02, 14.19it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 14.21it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 14.21it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 296/296 [01:13<00:00,  4.04it/s, val_loss=0.209, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1480-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1480-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:02<00:00, 16.34it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9328493475914001
        F1-Macro            0.7833461761474609
       F1-Weighted          0.9328146576881409
       F1_Class 0           0.9655588865280151
       F1_Class 1           0.8765432238578796
       F1_Class 2           0.5079365372657776
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1637
> Total counterfactuals added 1637
> Counterfactual size 655 at rate 0.4
> Train with CF samples 9469


Global seed set to 42


296 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 296/296 [01:08<00:00,  4.34it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:02, 15.97it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:02, 15.56it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:02, 15.64it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:01, 15.59it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:01, 15.64it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:01, 15.65it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 15.68it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 15.65it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 296/296 [01:13<00:00,  4.05it/s, val_loss=0.199, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1480-v2.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1480-v2.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:02<00:00, 17.08it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9401088953018188
        F1-Macro             0.749481201171875
       F1-Weighted          0.9371429681777954
       F1_Class 0           0.9667673707008362
       F1_Class 1            0.892787516117096
       F1_Class 2           0.3888888955116272
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1626
> Total counterfactuals added 1626
> Counterfactual size 650 at rate 0.4
> Train with CF samples 9465


Global seed set to 42


296 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 296/296 [01:00<00:00,  4.93it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:02, 15.09it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:02, 14.79it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:02, 14.65it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:02, 14.61it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:02, 14.64it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:01, 14.67it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 14.69it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 14.68it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 296/296 [01:05<00:00,  4.55it/s, val_loss=0.129, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1480-v3.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1480-v3.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:02<00:00, 16.64it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9537205100059509
        F1-Macro            0.8563604354858398
       F1-Weighted           0.95354163646698
       F1_Class 0            0.97780442237854
       F1_Class 1           0.8989690542221069
       F1_Class 2            0.692307710647583
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1647
> Total counterfactuals added 1647
> Counterfactual size 659 at rate 0.4
> Train with CF samples 9474


Global seed set to 42


297 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 297/297 [01:08<00:00,  4.35it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:01, 17.13it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:01, 16.56it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:01, 16.40it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:01, 16.34it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:01, 16.39it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:01, 16.41it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 16.45it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 16.45it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 297/297 [01:12<00:00,  4.07it/s, val_loss=0.538, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1485.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1485.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:01<00:00, 19.27it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.8539019823074341
        F1-Macro            0.7489420771598816
       F1-Weighted          0.8632718324661255
       F1_Class 0           0.9051383137702942
       F1_Class 1           0.7507787942886353
       F1_Class 2           0.5909090638160706
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1617
> Total counterfactuals added 1617
> Counterfactual size 808 at rate 0.5
> Train with CF samples 9622


Global seed set to 42


301 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 301/301 [01:01<00:00,  4.93it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:01, 20.05it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:01, 18.98it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:01, 18.97it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:01, 18.88it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:01, 18.89it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:01, 18.93it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 18.96it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 18.95it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 301/301 [01:05<00:00,  4.60it/s, val_loss=0.681, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1505-v5.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1505-v5.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:02<00:00, 17.46it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.8711434006690979
        F1-Macro            0.6430760025978088
       F1-Weighted          0.8564779162406921
       F1_Class 0           0.9274873733520508
       F1_Class 1           0.6684073209762573
       F1_Class 2           0.3333333432674408
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1637
> Total counterfactuals added 1637
> Counterfactual size 818 at rate 0.5
> Train with CF samples 9632


Global seed set to 42


301 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 301/301 [01:09<00:00,  4.34it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:02, 14.67it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:02, 14.47it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:02, 14.44it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:02, 14.36it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:02, 14.36it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:02, 14.35it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 14.35it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 14.33it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 301/301 [01:14<00:00,  4.04it/s, val_loss=0.218, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1505-v6.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1505-v6.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:02<00:00, 16.33it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy             0.934664249420166
        F1-Macro            0.7356922626495361
       F1-Weighted           0.928297758102417
       F1_Class 0           0.9623880386352539
       F1_Class 1           0.8857142925262451
       F1_Class 2           0.3589743673801422
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1637
> Total counterfactuals added 1637
> Counterfactual size 818 at rate 0.5
> Train with CF samples 9632


Global seed set to 42


301 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 301/301 [01:09<00:00,  4.34it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:02, 16.42it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:02, 15.63it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:02, 15.70it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:01, 15.61it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:01, 15.66it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:01, 15.68it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 15.70it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 15.70it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 301/301 [01:14<00:00,  4.06it/s, val_loss=0.166, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1505-v7.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1505-v7.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:02<00:00, 17.06it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9401088953018188
        F1-Macro            0.7090269327163696
       F1-Weighted          0.9334014654159546
       F1_Class 0           0.9645808935165405
       F1_Class 1           0.8958333134651184
       F1_Class 2           0.2666666805744171
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1626
> Total counterfactuals added 1626
> Counterfactual size 813 at rate 0.5
> Train with CF samples 9628


Global seed set to 42


301 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 301/301 [01:01<00:00,  4.93it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:02, 15.11it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:02, 14.64it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:02, 14.67it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:02, 14.59it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:02, 14.63it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:01, 14.64it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 14.66it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 14.66it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 301/301 [01:06<00:00,  4.55it/s, val_loss=0.121, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1505-v8.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1505-v8.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:01<00:00, 18.68it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9528130888938904
        F1-Macro            0.8315796852111816
       F1-Weighted          0.9513359665870667
       F1_Class 0           0.9785459041595459
       F1_Class 1           0.8939709067344666
       F1_Class 2           0.6222222447395325
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1647
> Total counterfactuals added 1647
> Counterfactual size 824 at rate 0.5
> Train with CF samples 9639


Global seed set to 42


302 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 302/302 [01:09<00:00,  4.35it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:02, 16.57it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:02, 16.48it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:01, 16.31it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:01, 16.31it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:01, 16.36it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:01, 16.34it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 16.37it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 16.36it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 302/302 [01:14<00:00,  4.07it/s, val_loss=0.565, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1510.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1510.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:01<00:00, 19.34it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.8647912740707397
        F1-Macro            0.7523934245109558
       F1-Weighted           0.873101532459259
       F1_Class 0            0.914323091506958
       F1_Class 1           0.7650793790817261
       F1_Class 2           0.5777778029441833
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1617
> Total counterfactuals added 1617
> Counterfactual size 970 at rate 0.6
> Train with CF samples 9784


Global seed set to 42


306 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 306/306 [01:02<00:00,  4.93it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:01, 19.56it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:01, 18.64it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:01, 18.72it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:01, 18.63it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:01, 18.73it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:01, 18.79it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 18.84it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 18.82it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 306/306 [01:06<00:00,  4.62it/s, val_loss=0.577, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1530-v5.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1530-v5.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:01<00:00, 17.58it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.8793103694915771
        F1-Macro            0.6740074157714844
       F1-Weighted          0.8682952523231506
       F1_Class 0           0.9322709441184998
       F1_Class 1           0.6995074152946472
       F1_Class 2           0.39024388790130615
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1637
> Total counterfactuals added 1637
> Counterfactual size 982 at rate 0.6
> Train with CF samples 9796


Global seed set to 42


307 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 307/307 [01:10<00:00,  4.35it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:02, 14.79it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:02, 14.12it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:02, 14.17it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:02, 14.10it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:02, 14.08it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:02, 14.09it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 14.10it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 14.12it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 307/307 [01:15<00:00,  4.05it/s, val_loss=0.217, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1535-v5.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1535-v5.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:02<00:00, 16.34it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9373865723609924
        F1-Macro            0.7967128753662109
       F1-Weighted          0.9365670680999756
       F1_Class 0           0.9659781455993652
       F1_Class 1           0.8884462118148804
       F1_Class 2           0.5357142686843872
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1637
> Total counterfactuals added 1637
> Counterfactual size 982 at rate 0.6
> Train with CF samples 9796


Global seed set to 42


307 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 307/307 [01:10<00:00,  4.35it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:02, 16.48it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:02, 16.09it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:02, 15.98it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:01, 15.87it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:01, 15.84it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:01, 15.80it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 15.75it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 15.69it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 307/307 [01:15<00:00,  4.06it/s, val_loss=0.194, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1535-v6.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1535-v6.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:02<00:00, 17.09it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9419237971305847
        F1-Macro            0.7067482471466064
       F1-Weighted          0.9358015656471252
       F1_Class 0           0.9656804800033569
       F1_Class 1           0.9045643210411072
       F1_Class 2                  0.25
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1626
> Total counterfactuals added 1626
> Counterfactual size 976 at rate 0.6
> Train with CF samples 9791


Global seed set to 42


306 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 306/306 [01:02<00:00,  4.93it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:02, 14.70it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:02, 14.63it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:02, 14.54it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:02, 14.51it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:02, 14.57it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:01, 14.62it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 14.65it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 14.65it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 306/306 [01:07<00:00,  4.56it/s, val_loss=0.133, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1530-v6.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1530-v6.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:01<00:00, 18.53it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9482758641242981
        F1-Macro            0.8271970748901367
       F1-Weighted          0.9468207359313965
       F1_Class 0           0.9755516052246094
       F1_Class 1           0.8838174343109131
       F1_Class 2           0.6222222447395325
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1647
> Total counterfactuals added 1647
> Counterfactual size 988 at rate 0.6
> Train with CF samples 9803


Global seed set to 42


307 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 307/307 [01:10<00:00,  4.34it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:01, 17.08it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:01, 16.52it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:01, 16.31it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:01, 16.28it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:01, 16.32it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:01, 16.38it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 16.40it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 16.39it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 307/307 [01:15<00:00,  4.07it/s, val_loss=0.582, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1535-v7.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1535-v7.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:01<00:00, 19.30it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.8647912740707397
        F1-Macro            0.7129950523376465
       F1-Weighted          0.8710318207740784
       F1_Class 0           0.9145466685295105
       F1_Class 1           0.7672955989837646
       F1_Class 2           0.4571428596973419
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1617
> Total counterfactuals added 1617
> Counterfactual size 1132 at rate 0.7000000000000001
> Train with CF samples 9946


Global seed set to 42


311 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 311/311 [01:03<00:00,  4.93it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:01, 19.70it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:01, 19.13it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:01, 18.83it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:01, 18.74it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:01, 18.80it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:01, 18.85it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 18.87it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 18.87it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 311/311 [01:07<00:00,  4.61it/s, val_loss=0.709, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1555-v2.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1555-v2.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:01<00:00, 17.56it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.8620689511299133
        F1-Macro            0.6299445033073425
       F1-Weighted          0.8481907248497009
       F1_Class 0           0.9261971712112427
       F1_Class 1           0.6363636255264282
       F1_Class 2           0.3272727131843567
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1637
> Total counterfactuals added 1637
> Counterfactual size 1146 at rate 0.7000000000000001
> Train with CF samples 9960


Global seed set to 42


312 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 312/312 [01:11<00:00,  4.35it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:02, 14.42it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:02, 14.26it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:02, 14.15it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:02, 14.10it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:02, 14.15it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:02, 14.17it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 14.20it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 14.19it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 312/312 [01:16<00:00,  4.06it/s, val_loss=0.221, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1560-v5.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1560-v5.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:02<00:00, 16.32it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9373865723609924
        F1-Macro            0.7663923501968384
       F1-Weighted          0.9349747896194458
       F1_Class 0           0.9677033424377441
       F1_Class 1           0.8870292901992798
       F1_Class 2           0.4444444477558136
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1637
> Total counterfactuals added 1637
> Counterfactual size 1146 at rate 0.7000000000000001
> Train with CF samples 9960


Global seed set to 42


312 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 312/312 [01:11<00:00,  4.34it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:02, 15.94it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:02, 15.44it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:02, 15.48it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:02, 15.46it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:01, 15.51it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:01, 15.55it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 15.58it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 15.56it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 312/312 [01:16<00:00,  4.07it/s, val_loss=0.164, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1560-v6.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1560-v6.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:02<00:00, 17.08it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9455535411834717
        F1-Macro            0.7689892649650574
       F1-Weighted           0.944625735282898
       F1_Class 0            0.970095694065094
       F1_Class 1           0.9113401770591736
       F1_Class 2           0.42553192377090454
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1626
> Total counterfactuals added 1626
> Counterfactual size 1138 at rate 0.7000000000000001
> Train with CF samples 9953


Global seed set to 42


312 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 312/312 [01:03<00:00,  4.94it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:02, 14.99it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:02, 14.75it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:02, 14.68it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:02, 14.55it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:02, 14.60it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:01, 14.63it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 14.65it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 14.64it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 312/312 [01:08<00:00,  4.57it/s, val_loss=0.101, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1560-v7.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1560-v7.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:01<00:00, 18.61it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9519056081771851
        F1-Macro            0.8269968032836914
       F1-Weighted          0.9508991241455078
       F1_Class 0           0.9784172773361206
       F1_Class 1           0.8938775658607483
       F1_Class 2           0.6086956262588501
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1647
> Total counterfactuals added 1647
> Counterfactual size 1153 at rate 0.7000000000000001
> Train with CF samples 9968


Global seed set to 42


312 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 312/312 [01:11<00:00,  4.35it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:02, 16.99it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:01, 16.60it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:01, 16.27it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:01, 16.23it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:01, 16.27it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:01, 16.32it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 16.35it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 16.34it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 312/312 [01:16<00:00,  4.07it/s, val_loss=0.639, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1560-v8.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1560-v8.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:01<00:00, 19.38it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.8702359199523926
        F1-Macro            0.7588468790054321
       F1-Weighted          0.8772158622741699
       F1_Class 0           0.9167750477790833
       F1_Class 1            0.774399995803833
       F1_Class 2           0.5853658318519592
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1617
> Total counterfactuals added 1617
> Counterfactual size 1294 at rate 0.8
> Train with CF samples 10108


Global seed set to 42


316 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 316/316 [01:12<00:00,  4.34it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:01, 19.91it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:01, 19.10it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:01, 19.14it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:01, 18.99it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:01, 19.00it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:01, 19.03it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 19.04it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 19.01it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 316/316 [01:17<00:00,  4.08it/s, val_loss=0.689, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1580.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1580.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:01<00:00, 17.51it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.8566243052482605
        F1-Macro            0.6412067413330078
       F1-Weighted           0.840263307094574
       F1_Class 0           0.9247190952301025
       F1_Class 1            0.598901093006134
       F1_Class 2           0.4000000059604645
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1637
> Total counterfactuals added 1637
> Counterfactual size 1310 at rate 0.8
> Train with CF samples 10124


Global seed set to 42


317 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 317/317 [01:12<00:00,  4.35it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:02, 14.73it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:02, 14.28it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:02, 14.30it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:02, 14.21it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:02, 14.24it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:02, 14.26it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 14.27it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 14.26it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 317/317 [01:18<00:00,  4.06it/s, val_loss=0.210, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1585-v4.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1585-v4.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:02<00:00, 16.32it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9337568283081055
        F1-Macro             0.773311972618103
       F1-Weighted          0.9305294156074524
       F1_Class 0           0.9636255502700806
       F1_Class 1           0.8763102889060974
       F1_Class 2           0.47999998927116394
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1637
> Total counterfactuals added 1637
> Counterfactual size 1310 at rate 0.8
> Train with CF samples 10124


Global seed set to 42


317 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 317/317 [01:12<00:00,  4.35it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:02, 16.07it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:02, 15.51it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:02, 15.63it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:02, 15.50it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:01, 15.51it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:01, 15.56it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 15.59it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 15.60it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 317/317 [01:17<00:00,  4.08it/s, val_loss=0.168, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1585-v5.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1585-v5.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:02<00:00, 17.06it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy             0.938293993473053
        F1-Macro            0.7401718497276306
       F1-Weighted          0.9352169036865234
       F1_Class 0           0.9635820984840393
       F1_Class 1           0.8979591727256775
       F1_Class 2           0.3589743673801422
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1626
> Total counterfactuals added 1626
> Counterfactual size 1301 at rate 0.8
> Train with CF samples 10116


Global seed set to 42


317 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 317/317 [01:04<00:00,  4.94it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:02, 15.21it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:02, 14.57it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:02, 14.66it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:02, 14.62it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:02, 14.66it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:01, 14.68it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 14.70it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 14.69it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 317/317 [01:09<00:00,  4.59it/s, val_loss=0.118, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1585-v6.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1585-v6.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:01<00:00, 18.67it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9537205100059509
        F1-Macro            0.8590520620346069
       F1-Weighted          0.9534392356872559
       F1_Class 0           0.9784946441650391
       F1_Class 1            0.894957959651947
       F1_Class 2           0.7037037014961243
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1647
> Total counterfactuals added 1647
> Counterfactual size 1318 at rate 0.8
> Train with CF samples 10133


Global seed set to 42


317 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 317/317 [01:13<00:00,  4.34it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:01, 17.02it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:02, 16.39it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:01, 16.43it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:01, 16.35it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:01, 16.37it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:01, 16.41it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 16.44it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 16.44it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 317/317 [01:17<00:00,  4.07it/s, val_loss=0.542, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1585-v7.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1585-v7.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:01<00:00, 19.32it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.8720508217811584
        F1-Macro            0.7321963310241699
       F1-Weighted          0.8776648640632629
       F1_Class 0           0.9189890027046204
       F1_Class 1           0.7775999903678894
       F1_Class 2                   0.5
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1617
> Total counterfactuals added 1617
> Counterfactual size 1455 at rate 0.9
> Train with CF samples 10269


Global seed set to 42


321 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 321/321 [01:13<00:00,  4.34it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:01, 20.12it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:01, 19.38it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:01, 19.29it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:01, 19.11it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:01, 19.08it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:01, 19.11it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 19.12it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 19.09it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 321/321 [01:18<00:00,  4.09it/s, val_loss=0.575, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1605.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1605.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:01<00:00, 17.62it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.8720508217811584
        F1-Macro            0.6611696481704712
       F1-Weighted           0.862197756767273
       F1_Class 0           0.9294652938842773
       F1_Class 1           0.6836734414100647
       F1_Class 2           0.37037035822868347
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1637
> Total counterfactuals added 1637
> Counterfactual size 1473 at rate 0.9
> Train with CF samples 10287


Global seed set to 42


322 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 322/322 [01:14<00:00,  4.35it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:02, 14.81it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:02, 14.33it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:02, 14.24it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:02, 14.16it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:02, 14.19it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:02, 14.19it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 14.19it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 14.18it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 322/322 [01:19<00:00,  4.07it/s, val_loss=0.240, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1610-v3.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1610-v3.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:02<00:00, 16.32it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9283121824264526
        F1-Macro             0.742748498916626
       F1-Weighted          0.9263178706169128
       F1_Class 0            0.962207555770874
       F1_Class 1           0.8731808662414551
       F1_Class 2           0.3928571343421936
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1637
> Total counterfactuals added 1637
> Counterfactual size 1473 at rate 0.9
> Train with CF samples 10287


Global seed set to 42


322 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 322/322 [01:14<00:00,  4.35it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:02, 16.09it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:02, 15.59it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:02, 15.56it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:01, 15.53it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:01, 15.59it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:01, 15.64it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 15.65it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 15.64it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 322/322 [01:18<00:00,  4.08it/s, val_loss=0.157, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1610-v4.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1610-v4.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:02<00:00, 17.10it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy             0.950090765953064
        F1-Macro            0.7672469019889832
       F1-Weighted          0.9474300146102905
       F1_Class 0           0.9718056321144104
       F1_Class 1           0.9196786880493164
       F1_Class 2           0.41025641560554504
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1626
> Total counterfactuals added 1626
> Counterfactual size 1463 at rate 0.9
> Train with CF samples 10278


Global seed set to 42


322 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 322/322 [01:05<00:00,  4.94it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:02, 15.32it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:02, 14.99it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:02, 14.94it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:02, 14.82it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:02, 14.82it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:01, 14.81it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 14.81it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 14.80it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 322/322 [01:10<00:00,  4.59it/s, val_loss=0.114, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1610-v5.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1610-v5.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:01<00:00, 18.64it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9473684430122375
        F1-Macro            0.8070864677429199
       F1-Weighted          0.9457088708877563
       F1_Class 0           0.9773809313774109
       F1_Class 1           0.8786610960960388
       F1_Class 2           0.5652173757553101
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1647
> Total counterfactuals added 1647
> Counterfactual size 1482 at rate 0.9
> Train with CF samples 10297


Global seed set to 42


322 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 322/322 [01:14<00:00,  4.34it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:02, 16.97it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:02, 16.49it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:01, 16.50it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:01, 16.44it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:01, 16.47it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:01, 16.49it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 16.50it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 16.45it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 322/322 [01:18<00:00,  4.08it/s, val_loss=0.573, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1610-v6.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1610-v6.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:01<00:00, 19.32it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.8747731447219849
        F1-Macro            0.7453669905662537
       F1-Weighted          0.8815464973449707
       F1_Class 0           0.9222797751426697
       F1_Class 1           0.7804877758026123
       F1_Class 2           0.5333333611488342
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1617
> Total counterfactuals added 1617
> Counterfactual size 1617 at rate 1.0
> Train with CF samples 10431


Global seed set to 42


326 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 326/326 [01:15<00:00,  4.34it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:01, 20.22it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:01, 18.88it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:01, 19.01it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:01, 18.94it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:01, 18.99it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:01, 19.03it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 19.07it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 19.05it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 326/326 [01:19<00:00,  4.09it/s, val_loss=0.602, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1630-v2.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1630-v2.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:01<00:00, 17.64it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.8711434006690979
        F1-Macro            0.6621391773223877
       F1-Weighted          0.8611125946044922
       F1_Class 0           0.9288560152053833
       F1_Class 1           0.6802030205726624
       F1_Class 2           0.37735849618911743
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1637
> Total counterfactuals added 1637
> Counterfactual size 1637 at rate 1.0
> Train with CF samples 10451


Global seed set to 42


327 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 327/327 [01:15<00:00,  4.34it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:02, 14.06it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:02, 14.05it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:02, 14.13it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:02, 14.08it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:02, 14.13it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:02, 14.16it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 14.16it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 14.16it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 327/327 [01:20<00:00,  4.07it/s, val_loss=0.200, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1635.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1635.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:02<00:00, 16.32it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9410163164138794
        F1-Macro            0.7693991661071777
       F1-Weighted          0.9383261203765869
       F1_Class 0           0.9685990214347839
       F1_Class 1           0.8995984196662903
       F1_Class 2           0.4399999976158142
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1637
> Total counterfactuals added 1637
> Counterfactual size 1637 at rate 1.0
> Train with CF samples 10451


Global seed set to 42


327 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 327/327 [01:15<00:00,  4.34it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:02, 16.51it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:02, 15.71it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:02, 15.75it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:01, 15.68it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:01, 15.70it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:01, 15.72it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 15.73it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 15.72it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 327/327 [01:20<00:00,  4.08it/s, val_loss=0.168, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1635-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1635-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:02<00:00, 17.10it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9401088953018188
        F1-Macro            0.7575883865356445
       F1-Weighted          0.9377117156982422
       F1_Class 0           0.9656833410263062
       F1_Class 1            0.89682537317276
       F1_Class 2           0.41025641560554504
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1626
> Total counterfactuals added 1626
> Counterfactual size 1626 at rate 1.0
> Train with CF samples 10441


Global seed set to 42


327 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 327/327 [01:06<00:00,  4.95it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:02, 15.08it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:02, 14.47it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:02, 14.57it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:02, 14.49it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:02, 14.53it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:01, 14.57it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 14.60it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 14.60it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 327/327 [01:11<00:00,  4.60it/s, val_loss=0.105, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1635-v2.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1635-v2.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:01<00:00, 18.63it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9555354118347168
        F1-Macro            0.8586801290512085
       F1-Weighted          0.9542275667190552
       F1_Class 0           0.9774346947669983
       F1_Class 1            0.902953565120697
       F1_Class 2            0.695652186870575
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1647
> Total counterfactuals added 1647
> Counterfactual size 1647 at rate 1.0
> Train with CF samples 10462


Global seed set to 42


327 35


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 327/327 [01:15<00:00,  4.34it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/35 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▌                  | 1/35 [00:00<00:01, 17.12it/s][A
Validation DataLoader 0:   6%|█                  | 2/35 [00:00<00:01, 16.56it/s][A
Validation DataLoader 0:   9%|█▋                 | 3/35 [00:00<00:01, 16.36it/s][A
Validation DataLoader 0:  11%|██▏                | 4/35 [00:00<00:01, 16.28it/s][A
Validation DataLoader 0:  14%|██▋                | 5/35 [00:00<00:01, 16.36it/s][A
Validation DataLoader 0:  17%|███▎               | 6/35 [00:00<00:01, 16.41it/s][A
Validation DataLoader 0:  20%|███▊               | 7/35 [00:00<00:01, 16.45it/s][A
Validation DataLoader 0:  23%|████▎              | 8/35 [00:00<00:01, 16.41it/s][A
Validation DataLoader 0:  26%|████▉          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 327/327 [01:20<00:00,  4.08it/s, val_loss=0.485, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1635-v3.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1635-v3.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 35/35 [00:01<00:00, 19.33it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.8666061758995056
        F1-Macro            0.7654560804367065
       F1-Weighted           0.873859703540802
       F1_Class 0           0.9134677648544312
       F1_Class 1           0.7675158977508545
       F1_Class 2           0.6153846383094788
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Sports (12306, 4)
> Train samples 9844


Global seed set to 42


308 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 308/308 [01:03<00:00,  4.82it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:01, 20.01it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:01, 19.15it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:01, 18.99it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:01, 18.92it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:01, 18.98it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:01, 19.03it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:01, 19.06it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:01, 19.06it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 308/308 [01:08<00:00,  4.50it/s, val_loss=0.329, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1540.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1540.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 17.20it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9203899502754211
        F1-Macro            0.6388028860092163
       F1-Weighted          0.9135203957557678
       F1_Class 0           0.9610894918441772
       F1_Class 1           0.7553191781044006
       F1_Class 2           0.20000000298023224
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Train samples 9845


Global seed set to 42


308 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Epoch 0: 100%|████████████████████████████████| 308/308 [01:02<00:00,  4.96it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:02, 17.10it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:02, 16.67it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:02, 16.65it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:02, 16.53it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:02, 16.54it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:01, 16.56it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:01, 16.56it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:01, 16.53it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 308/308 [01:07<00:00,  4.59it/s, val_loss=0.0914, val_f1_macro_


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1540-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1540-v1.ckpt


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 13.67it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9642567038536072
        F1-Macro            0.7966409921646118
       F1-Weighted          0.9644883871078491
       F1_Class 0            0.986748218536377
       F1_Class 1           0.9142857193946838
       F1_Class 2           0.4888888895511627
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Train samples 9845


Global seed set to 42


308 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Epoch 0: 100%|████████████████████████████████| 308/308 [01:03<00:00,  4.82it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:02, 18.16it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:02, 17.30it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:02, 17.32it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:02, 17.26it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:01, 17.29it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:01, 17.28it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:01, 17.29it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:01, 17.23it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 308/308 [01:08<00:00,  4.47it/s, val_loss=0.0856, val_f1_macro_


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1540-v2.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1540-v2.ckpt


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 17.00it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9675061106681824
        F1-Macro            0.7618280649185181
       F1-Weighted          0.9666422605514526
       F1_Class 0           0.9913132190704346
       F1_Class 1           0.9220778942108154
       F1_Class 2           0.3720930218696594
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Train samples 9845


Global seed set to 42


308 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Epoch 0: 100%|████████████████████████████████| 308/308 [01:03<00:00,  4.82it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:02, 14.61it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:02, 14.47it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:02, 14.28it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:02, 14.19it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:02, 14.21it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:02, 14.23it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:02, 14.25it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:02, 14.23it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 308/308 [01:09<00:00,  4.45it/s, val_loss=0.0761, val_f1_macro_


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1540-v3.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1540-v3.ckpt


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 17.21it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9805036783218384
        F1-Macro            0.9007909893989563
       F1-Weighted           0.980593740940094
       F1_Class 0           0.9933707118034363
       F1_Class 1            0.949002206325531
       F1_Class 2           0.7599999904632568
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Train samples 9845


Global seed set to 42


308 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Epoch 0: 100%|████████████████████████████████| 308/308 [01:04<00:00,  4.81it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:02, 18.34it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:02, 17.74it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:02, 17.69it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:01, 17.56it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:01, 17.53it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:01, 17.51it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:01, 17.49it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:01, 17.46it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 308/308 [01:08<00:00,  4.49it/s, val_loss=0.522, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1540-v4.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1540-v4.ckpt


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 15.02it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.8887083530426025
        F1-Macro            0.7144069671630859
       F1-Weighted          0.8967264890670776
       F1_Class 0           0.9360780119895935
       F1_Class 1           0.7785714268684387
       F1_Class 2           0.4285714328289032
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1477
> Total counterfactuals added 1477
> Counterfactual size 148 at rate 0.1
> Train with CF samples 9992


Global seed set to 42


313 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 313/313 [01:04<00:00,  4.83it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:01, 19.63it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:01, 19.31it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:01, 19.32it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:01, 19.11it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:01, 19.16it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:01, 19.19it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:01, 19.22it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:01, 19.21it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 313/313 [01:09<00:00,  4.51it/s, val_loss=0.386, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1565.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1565.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 17.21it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9179528951644897
        F1-Macro             0.659504771232605
       F1-Weighted          0.9098943471908569
       F1_Class 0            0.957446813583374
       F1_Class 1           0.7452054619789124
       F1_Class 2           0.27586206793785095
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1455
> Total counterfactuals added 1455
> Counterfactual size 146 at rate 0.1
> Train with CF samples 9991


Global seed set to 42


313 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 313/313 [01:03<00:00,  4.96it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:02, 16.84it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:02, 16.68it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:02, 16.66it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:02, 16.53it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:02, 16.56it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:01, 16.57it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:01, 16.59it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:01, 16.56it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 313/313 [01:08<00:00,  4.60it/s, val_loss=0.130, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1565-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1565-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 13.69it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9569455981254578
        F1-Macro            0.8030791282653809
       F1-Weighted          0.9592539072036743
       F1_Class 0           0.9851205945014954
       F1_Class 1           0.8884026408195496
       F1_Class 2           0.5357142686843872
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1468
> Total counterfactuals added 1468
> Counterfactual size 147 at rate 0.1
> Train with CF samples 9992


Global seed set to 42


313 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 313/313 [01:04<00:00,  4.82it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:02, 17.50it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:02, 17.39it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:02, 17.24it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:02, 17.12it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:01, 17.15it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:01, 17.16it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:01, 17.19it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:01, 17.17it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 313/313 [01:09<00:00,  4.49it/s, val_loss=0.117, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1565-v2.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1565-v2.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 17.04it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9658814072608948
        F1-Macro            0.7883796691894531
       F1-Weighted          0.9661768674850464
       F1_Class 0           0.9897750616073608
       F1_Class 1           0.9170305728912354
       F1_Class 2           0.4583333432674408
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1507
> Total counterfactuals added 1507
> Counterfactual size 151 at rate 0.1
> Train with CF samples 9996


Global seed set to 42


313 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 313/313 [01:04<00:00,  4.83it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:02, 14.60it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:02, 14.43it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:02, 14.29it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:02, 14.20it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:02, 14.22it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:02, 14.22it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:02, 14.25it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:02, 14.25it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 313/313 [01:10<00:00,  4.46it/s, val_loss=0.0755, val_f1_macro_


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1565-v3.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1565-v3.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 17.24it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9821283221244812
        F1-Macro            0.8977763652801514
       F1-Weighted          0.9815115928649902
       F1_Class 0           0.9933909773826599
       F1_Class 1           0.9557521939277649
       F1_Class 2           0.7441860437393188
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1529
> Total counterfactuals added 1529
> Counterfactual size 153 at rate 0.1
> Train with CF samples 9998


Global seed set to 42


313 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 313/313 [01:04<00:00,  4.82it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:02, 17.96it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:02, 17.67it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:02, 17.52it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:02, 17.39it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:01, 17.45it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:01, 17.45it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:01, 17.47it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:01, 17.46it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 313/313 [01:09<00:00,  4.49it/s, val_loss=0.422, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1565-v4.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1565-v4.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 15.02it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.8992688655853271
        F1-Macro             0.740026593208313
       F1-Weighted          0.9085540771484375
       F1_Class 0           0.9395248293876648
       F1_Class 1           0.8248587846755981
       F1_Class 2           0.4556961953639984
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1477
> Total counterfactuals added 1477
> Counterfactual size 295 at rate 0.2
> Train with CF samples 10139


Global seed set to 42


317 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 317/317 [01:05<00:00,  4.82it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:01, 19.95it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:01, 19.14it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:01, 18.96it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:01, 18.87it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:01, 18.92it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:01, 18.96it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:01, 18.99it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:01, 19.00it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 317/317 [01:10<00:00,  4.49it/s, val_loss=0.346, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1585-v8.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1585-v8.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 17.22it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9236392974853516
        F1-Macro            0.6465892791748047
       F1-Weighted          0.9168554544448853
       F1_Class 0           0.9615945816040039
       F1_Class 1            0.771276593208313
       F1_Class 2           0.2068965584039688
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1455
> Total counterfactuals added 1455
> Counterfactual size 291 at rate 0.2
> Train with CF samples 10136


Global seed set to 42


317 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 317/317 [01:04<00:00,  4.95it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:02, 17.19it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:02, 16.88it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:02, 16.58it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:02, 16.47it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:02, 16.50it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:01, 16.54it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:01, 16.55it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:01, 16.53it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 317/317 [01:09<00:00,  4.59it/s, val_loss=0.119, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1585-v9.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1585-v9.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 13.67it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9601949453353882
        F1-Macro            0.7828621864318848
       F1-Weighted          0.9597928524017334
       F1_Class 0           0.9840943813323975
       F1_Class 1            0.902953565120697
       F1_Class 2           0.4615384638309479
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1468
> Total counterfactuals added 1468
> Counterfactual size 294 at rate 0.2
> Train with CF samples 10139


Global seed set to 42


317 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 317/317 [01:05<00:00,  4.82it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:02, 17.29it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:02, 17.09it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:02, 16.97it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:02, 16.92it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:02, 16.99it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:01, 17.03it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:01, 17.06it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:01, 17.06it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 317/317 [01:10<00:00,  4.47it/s, val_loss=0.103, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1585-v10.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1585-v10.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 17.04it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy             0.96913081407547
        F1-Macro            0.8044660091400146
       F1-Weighted          0.9685253500938416
       F1_Class 0           0.9903209209442139
       F1_Class 1           0.9230769276618958
       F1_Class 2                   0.5
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1507
> Total counterfactuals added 1507
> Counterfactual size 301 at rate 0.2
> Train with CF samples 10146


Global seed set to 42


318 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 318/318 [01:05<00:00,  4.82it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:02, 14.71it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:02, 14.32it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:02, 14.35it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:02, 14.27it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:02, 14.29it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:02, 14.32it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:02, 14.33it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:02, 14.31it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 318/318 [01:11<00:00,  4.47it/s, val_loss=0.077, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1590-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1590-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 17.24it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9772542715072632
        F1-Macro            0.8138749599456787
       F1-Weighted           0.974577009677887
       F1_Class 0           0.9923973679542542
       F1_Class 1           0.9492273926734924
       F1_Class 2                   0.5
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1529
> Total counterfactuals added 1529
> Counterfactual size 306 at rate 0.2
> Train with CF samples 10151


Global seed set to 42


318 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 318/318 [01:05<00:00,  4.82it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:02, 18.50it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:02, 17.98it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:02, 17.88it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:01, 17.68it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:01, 17.66it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:01, 17.66it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:01, 17.64it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:01, 17.58it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 318/318 [01:10<00:00,  4.49it/s, val_loss=0.639, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1590-v2.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1590-v2.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 15.02it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.8627132177352905
        F1-Macro            0.6929728388786316
       F1-Weighted          0.8770920038223267
       F1_Class 0           0.9196035265922546
       F1_Class 1           0.7446808218955994
       F1_Class 2           0.4146341383457184
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1477
> Total counterfactuals added 1477
> Counterfactual size 443 at rate 0.30000000000000004
> Train with CF samples 10287


Global seed set to 42


322 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 322/322 [01:06<00:00,  4.83it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:01, 19.82it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:01, 19.17it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:01, 19.14it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:01, 18.83it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:01, 18.84it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:01, 18.92it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:01, 18.96it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:01, 18.95it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 322/322 [01:11<00:00,  4.51it/s, val_loss=0.308, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1610-v7.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1610-v7.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 17.24it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9236392974853516
        F1-Macro            0.6805613040924072
       F1-Weighted          0.9179952144622803
       F1_Class 0           0.9619883298873901
       F1_Class 1           0.7671957612037659
       F1_Class 2                 0.3125
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1455
> Total counterfactuals added 1455
> Counterfactual size 437 at rate 0.30000000000000004
> Train with CF samples 10282


Global seed set to 42


322 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 322/322 [01:04<00:00,  4.96it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:02, 17.31it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:02, 16.91it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:02, 16.84it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:02, 16.70it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:02, 16.68it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:01, 16.69it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:01, 16.68it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:01, 16.65it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 322/322 [01:09<00:00,  4.61it/s, val_loss=0.128, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1610-v8.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1610-v8.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 13.68it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy             0.965069055557251
        F1-Macro            0.7352176904678345
       F1-Weighted          0.9620931148529053
       F1_Class 0           0.9853609204292297
       F1_Class 1            0.926174521446228
       F1_Class 2           0.29411765933036804
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1468
> Total counterfactuals added 1468
> Counterfactual size 440 at rate 0.30000000000000004
> Train with CF samples 10285


Global seed set to 42


322 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 322/322 [01:06<00:00,  4.83it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:02, 17.75it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:02, 17.31it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:02, 17.30it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:02, 17.19it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:01, 17.22it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:01, 17.22it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:01, 17.25it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:01, 17.25it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 322/322 [01:11<00:00,  4.49it/s, val_loss=0.102, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1610-v9.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1610-v9.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 17.03it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9666937589645386
        F1-Macro            0.7931368350982666
       F1-Weighted          0.9668639898300171
       F1_Class 0           0.9892473220825195
       F1_Class 1           0.9220778942108154
       F1_Class 2           0.4680851101875305
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1507
> Total counterfactuals added 1507
> Counterfactual size 452 at rate 0.30000000000000004
> Train with CF samples 10297


Global seed set to 42


322 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 322/322 [01:07<00:00,  4.80it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:02, 14.79it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:02, 14.28it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:02, 14.10it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:02, 14.05it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:02, 14.11it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:02, 14.14it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:02, 14.17it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:02, 14.17it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 322/322 [01:12<00:00,  4.46it/s, val_loss=0.0991, val_f1_macro_


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1610-v10.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1610-v10.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 17.21it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy             0.978066623210907
        F1-Macro            0.8839057683944702
       F1-Weighted          0.9774342179298401
       F1_Class 0           0.9903797507286072
       F1_Class 1           0.9502262473106384
       F1_Class 2           0.7111111283302307
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1529
> Total counterfactuals added 1529
> Counterfactual size 459 at rate 0.30000000000000004
> Train with CF samples 10304


Global seed set to 42


322 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 322/322 [01:06<00:00,  4.81it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:02, 18.43it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:02, 17.65it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:02, 17.65it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:01, 17.54it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:01, 17.56it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:01, 17.58it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:01, 17.59it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:01, 17.57it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 322/322 [01:11<00:00,  4.48it/s, val_loss=0.609, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1610-v11.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1610-v11.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 14.99it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.8659626245498657
        F1-Macro            0.6981228590011597
       F1-Weighted          0.8834308385848999
       F1_Class 0           0.9213853478431702
       F1_Class 1           0.7729831337928772
       F1_Class 2           0.4000000059604645
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1477
> Total counterfactuals added 1477
> Counterfactual size 591 at rate 0.4
> Train with CF samples 10435


Global seed set to 42


327 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 327/327 [01:07<00:00,  4.82it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:01, 20.43it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:01, 19.66it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:01, 19.53it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:01, 19.36it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:01, 19.34it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:01, 19.34it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:01, 19.35it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:01, 19.31it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 327/327 [01:12<00:00,  4.52it/s, val_loss=0.465, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1635-v4.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1635-v4.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 17.19it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9106417298316956
        F1-Macro            0.5881105065345764
       F1-Weighted          0.8981415033340454
       F1_Class 0           0.9524267315864563
       F1_Class 1           0.7166666388511658
       F1_Class 2            0.095238097012043
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1455
> Total counterfactuals added 1455
> Counterfactual size 582 at rate 0.4
> Train with CF samples 10427


Global seed set to 42


326 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 326/326 [01:05<00:00,  4.95it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:02, 17.00it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:02, 16.54it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:02, 16.43it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:02, 16.31it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:02, 16.36it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:02, 16.38it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:01, 16.40it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:01, 16.39it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 326/326 [01:10<00:00,  4.61it/s, val_loss=0.126, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1630-v3.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1630-v3.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 13.71it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy             0.961007297039032
        F1-Macro            0.8236872553825378
       F1-Weighted          0.9626455307006836
       F1_Class 0           0.9857723712921143
       F1_Class 1           0.8990825414657593
       F1_Class 2           0.5862069129943848
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1468
> Total counterfactuals added 1468
> Counterfactual size 587 at rate 0.4
> Train with CF samples 10432


Global seed set to 42


326 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 326/326 [01:45<00:00,  3.10it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:02, 18.00it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:02, 17.23it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:02, 17.31it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:02, 17.14it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:01, 17.19it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:01, 17.19it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:01, 17.22it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:01, 17.22it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 326/326 [01:49<00:00,  2.97it/s, val_loss=0.0795, val_f1_macro_


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1630-v4.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1630-v4.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 17.00it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9634443521499634
        F1-Macro            0.7805036306381226
       F1-Weighted          0.9647758603096008
       F1_Class 0           0.9908163547515869
       F1_Class 1                 0.90625
       F1_Class 2           0.4444444477558136
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1507
> Total counterfactuals added 1507
> Counterfactual size 603 at rate 0.4
> Train with CF samples 10448


Global seed set to 42


327 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 327/327 [01:07<00:00,  4.83it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:02, 14.72it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:02, 14.48it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:02, 14.35it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:02, 14.18it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:02, 14.18it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:02, 14.18it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:02, 14.21it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:02, 14.18it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 327/327 [01:13<00:00,  4.48it/s, val_loss=0.0832, val_f1_macro_


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1635-v5.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1635-v5.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 17.12it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9764419198036194
        F1-Macro            0.8369858264923096
       F1-Weighted          0.9744481444358826
       F1_Class 0           0.9924127459526062
       F1_Class 1           0.9395973086357117
       F1_Class 2           0.5789473652839661
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1529
> Total counterfactuals added 1529
> Counterfactual size 612 at rate 0.4
> Train with CF samples 10457


Global seed set to 42


327 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 327/327 [01:07<00:00,  4.82it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:02, 18.10it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:02, 17.52it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:02, 17.39it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:02, 17.31it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:01, 17.33it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:01, 17.36it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:01, 17.39it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:01, 17.39it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 327/327 [01:12<00:00,  4.50it/s, val_loss=0.338, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1635-v6.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1635-v6.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 15.05it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9138911366462708
        F1-Macro            0.7801835536956787
       F1-Weighted          0.9204922318458557
       F1_Class 0           0.9474812150001526
       F1_Class 1           0.8451243042945862
       F1_Class 2           0.5479452013969421
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1477
> Total counterfactuals added 1477
> Counterfactual size 738 at rate 0.5
> Train with CF samples 10582


Global seed set to 42


331 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 331/331 [01:08<00:00,  4.82it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:01, 20.24it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:01, 18.99it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:01, 19.10it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:01, 19.01it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:01, 19.08it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:01, 19.11it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:01, 19.12it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:01, 19.11it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 331/331 [01:13<00:00,  4.52it/s, val_loss=0.337, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1655.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1655.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 17.20it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9212023019790649
        F1-Macro            0.6037837862968445
       F1-Weighted          0.9121038317680359
       F1_Class 0           0.9592233300209045
       F1_Class 1           0.7651715278625488
       F1_Class 2           0.08695652335882187
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1455
> Total counterfactuals added 1455
> Counterfactual size 728 at rate 0.5
> Train with CF samples 10573


Global seed set to 42


331 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 331/331 [01:06<00:00,  4.96it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:02, 17.06it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:02, 16.39it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:02, 16.46it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:02, 16.40it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:02, 16.45it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:02, 16.49it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:01, 16.51it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:01, 16.49it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 331/331 [01:11<00:00,  4.61it/s, val_loss=0.124, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1655-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1655-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 13.65it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9528838396072388
        F1-Macro            0.6707855463027954
       F1-Weighted          0.9485746026039124
       F1_Class 0           0.9790281057357788
       F1_Class 1           0.8953974843025208
       F1_Class 2           0.13793103396892548
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1468
> Total counterfactuals added 1468
> Counterfactual size 734 at rate 0.5
> Train with CF samples 10579


Global seed set to 42


331 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 331/331 [01:46<00:00,  3.11it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:02, 18.03it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:02, 17.22it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:02, 17.31it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:02, 17.18it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:01, 17.22it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:01, 17.24it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:01, 17.27it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:01, 17.24it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 331/331 [01:51<00:00,  2.97it/s, val_loss=0.092, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1655-v2.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1655-v2.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 17.14it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9658814072608948
        F1-Macro            0.6960113048553467
       F1-Weighted          0.9616323709487915
       F1_Class 0           0.9913221001625061
       F1_Class 1            0.914893627166748
       F1_Class 2           0.1818181872367859
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1507
> Total counterfactuals added 1507
> Counterfactual size 754 at rate 0.5
> Train with CF samples 10599


Global seed set to 42


332 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 332/332 [01:08<00:00,  4.83it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:02, 14.70it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:02, 14.24it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:02, 14.29it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:02, 14.15it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:02, 14.17it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:02, 14.18it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:02, 14.18it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:02, 14.18it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 332/332 [01:14<00:00,  4.48it/s, val_loss=0.0743, val_f1_macro_


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1660.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1660.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 17.19it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9813160300254822
        F1-Macro            0.8908445835113525
       F1-Weighted          0.9810788035392761
       F1_Class 0           0.9933775067329407
       F1_Class 1           0.9557521939277649
       F1_Class 2           0.7234042286872864
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1529
> Total counterfactuals added 1529
> Counterfactual size 764 at rate 0.5
> Train with CF samples 10609


Global seed set to 42


332 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 332/332 [01:08<00:00,  4.83it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:02, 18.17it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:02, 17.49it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:02, 17.53it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:02, 17.44it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:01, 17.49it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:01, 17.51it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:01, 17.52it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:01, 17.50it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 332/332 [01:13<00:00,  4.51it/s, val_loss=0.601, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1660-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1660-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 15.03it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.8651502728462219
        F1-Macro            0.7075013518333435
       F1-Weighted          0.8766989707946777
       F1_Class 0           0.9177250266075134
       F1_Class 1           0.7457627058029175
       F1_Class 2           0.4590163826942444
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1477
> Total counterfactuals added 1477
> Counterfactual size 886 at rate 0.6
> Train with CF samples 10730


Global seed set to 42


336 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 336/336 [01:09<00:00,  4.83it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:01, 19.72it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:01, 19.39it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:01, 19.09it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:01, 19.01it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:01, 19.06it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:01, 19.09it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:01, 19.13it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:01, 19.12it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 336/336 [01:14<00:00,  4.52it/s, val_loss=0.331, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1680.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1680.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 17.15it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9187652468681335
        F1-Macro            0.6025514602661133
       F1-Weighted          0.9079164862632751
       F1_Class 0           0.9569840431213379
       F1_Class 1           0.7506702542304993
       F1_Class 2           0.10000000149011612
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1455
> Total counterfactuals added 1455
> Counterfactual size 873 at rate 0.6
> Train with CF samples 10718


Global seed set to 42


335 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 335/335 [01:07<00:00,  4.95it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:02, 16.95it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:02, 16.73it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:02, 16.56it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:02, 16.48it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:02, 16.51it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:01, 16.50it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:01, 16.51it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:01, 16.49it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 335/335 [01:13<00:00,  4.59it/s, val_loss=0.124, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1675.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1675.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 13.69it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9618196487426758
        F1-Macro             0.693973183631897
       F1-Weighted          0.9582565426826477
       F1_Class 0           0.9858155846595764
       F1_Class 1           0.9142857193946838
       F1_Class 2           0.1818181872367859
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1468
> Total counterfactuals added 1468
> Counterfactual size 881 at rate 0.6
> Train with CF samples 10726


Global seed set to 42


336 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 336/336 [01:47<00:00,  3.12it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:02, 17.64it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:02, 17.40it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:02, 17.38it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:02, 17.26it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:01, 17.28it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:01, 17.30it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:01, 17.30it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:01, 17.28it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 336/336 [01:52<00:00,  2.98it/s, val_loss=0.113, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1680-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1680-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 17.10it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9626320004463196
        F1-Macro            0.7708926200866699
       F1-Weighted          0.9618399739265442
       F1_Class 0           0.9877551198005676
       F1_Class 1           0.9063180685043335
       F1_Class 2           0.41860464215278625
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1507
> Total counterfactuals added 1507
> Counterfactual size 904 at rate 0.6
> Train with CF samples 10749


Global seed set to 42


336 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 336/336 [01:09<00:00,  4.82it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:02, 14.67it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:02, 14.39it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:02, 14.24it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:02, 14.20it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:02, 14.24it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:02, 14.26it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:02, 14.25it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:02, 14.23it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 336/336 [01:15<00:00,  4.48it/s, val_loss=0.0896, val_f1_macro_


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1680-v2.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1680-v2.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 17.21it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9813160300254822
        F1-Macro            0.8814681172370911
       F1-Weighted           0.980457067489624
       F1_Class 0           0.9939209818840027
       F1_Class 1           0.9528089761734009
       F1_Class 2           0.6976743936538696
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1529
> Total counterfactuals added 1529
> Counterfactual size 917 at rate 0.6
> Train with CF samples 10762


Global seed set to 42


337 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 337/337 [01:09<00:00,  4.83it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:02, 18.03it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:02, 17.48it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:02, 17.27it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:02, 17.17it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:01, 17.26it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:01, 17.31it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:01, 17.35it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:01, 17.34it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 337/337 [01:14<00:00,  4.51it/s, val_loss=0.431, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1685.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1685.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 15.03it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.8952071666717529
        F1-Macro            0.7279069423675537
       F1-Weighted          0.9079375267028809
       F1_Class 0           0.9367225766181946
       F1_Class 1           0.8388349413871765
       F1_Class 2           0.40816327929496765
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1477
> Total counterfactuals added 1477
> Counterfactual size 1034 at rate 0.7000000000000001
> Train with CF samples 10878


Global seed set to 42


340 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 340/340 [01:10<00:00,  4.82it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:01, 20.06it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:01, 19.21it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:01, 19.20it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:01, 19.12it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:01, 19.15it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:01, 19.17it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:01, 19.18it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:01, 19.18it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 340/340 [01:15<00:00,  4.52it/s, val_loss=0.325, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1700.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1700.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 17.17it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9203899502754211
        F1-Macro            0.6911215782165527
       F1-Weighted          0.9138709306716919
       F1_Class 0           0.9597283005714417
       F1_Class 1                  0.75
       F1_Class 2           0.3636363744735718
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1455
> Total counterfactuals added 1455
> Counterfactual size 1019 at rate 0.7000000000000001
> Train with CF samples 10864


Global seed set to 42


340 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 340/340 [01:08<00:00,  4.96it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:02, 17.24it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:02, 16.52it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:02, 16.60it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:02, 16.52it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:02, 16.55it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:01, 16.56it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:01, 16.58it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:01, 16.53it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 340/340 [01:13<00:00,  4.63it/s, val_loss=0.139, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1700-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1700-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 13.70it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9642567038536072
        F1-Macro            0.7358962297439575
       F1-Weighted          0.9609932899475098
       F1_Class 0            0.985375702381134
       F1_Class 1           0.9192824959754944
       F1_Class 2           0.3030303120613098
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1468
> Total counterfactuals added 1468
> Counterfactual size 1028 at rate 0.7000000000000001
> Train with CF samples 10873


Global seed set to 42


340 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 340/340 [01:49<00:00,  3.11it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:02, 17.79it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:02, 17.11it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:02, 17.20it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:02, 17.12it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:01, 17.17it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:01, 17.21it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:01, 17.24it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:01, 17.22it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 340/340 [01:54<00:00,  2.98it/s, val_loss=0.0771, val_f1_macro_


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1700-v2.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1700-v2.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 17.11it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9748172163963318
        F1-Macro             0.787205696105957
       F1-Weighted          0.9725969433784485
       F1_Class 0           0.9944020509719849
       F1_Class 1           0.9347826242446899
       F1_Class 2           0.4324324429035187
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1507
> Total counterfactuals added 1507
> Counterfactual size 1055 at rate 0.7000000000000001
> Train with CF samples 10900


Global seed set to 42


341 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 341/341 [01:49<00:00,  3.11it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:02, 14.61it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:02, 14.31it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:02, 14.22it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:02, 14.16it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:02, 14.19it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:02, 14.22it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:02, 14.23it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:02, 14.23it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 341/341 [01:55<00:00,  2.96it/s, val_loss=0.0681, val_f1_macro_


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1705.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1705.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 17.24it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9805036783218384
        F1-Macro            0.9074411988258362
       F1-Weighted          0.9807379245758057
       F1_Class 0           0.9923819303512573
       F1_Class 1           0.9521639943122864
       F1_Class 2           0.7777777910232544
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1529
> Total counterfactuals added 1529
> Counterfactual size 1070 at rate 0.7000000000000001
> Train with CF samples 10915


Global seed set to 42


342 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 342/342 [01:10<00:00,  4.83it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:02, 18.18it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:02, 17.61it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:02, 17.64it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:02, 17.44it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:01, 17.48it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:01, 17.51it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:01, 17.53it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:01, 17.52it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 342/342 [01:15<00:00,  4.52it/s, val_loss=0.300, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1710.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1710.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 15.07it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9049553275108337
        F1-Macro            0.7460469007492065
       F1-Weighted          0.9143206477165222
       F1_Class 0           0.9440860152244568
       F1_Class 1           0.8362234830856323
       F1_Class 2           0.45783132314682007
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1477
> Total counterfactuals added 1477
> Counterfactual size 1182 at rate 0.8
> Train with CF samples 11026


Global seed set to 42


345 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 345/345 [01:11<00:00,  4.83it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:01, 20.46it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:01, 19.57it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:01, 19.32it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:01, 19.18it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:01, 19.21it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:01, 19.21it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:01, 19.21it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:01, 19.17it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 345/345 [01:16<00:00,  4.53it/s, val_loss=0.434, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1725.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1725.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 17.22it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9073923826217651
        F1-Macro            0.6324447989463806
       F1-Weighted          0.8951402902603149
       F1_Class 0           0.9501915574073792
       F1_Class 1           0.6971428394317627
       F1_Class 2                  0.25
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1455
> Total counterfactuals added 1455
> Counterfactual size 1164 at rate 0.8
> Train with CF samples 11009


Global seed set to 42


345 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 345/345 [01:09<00:00,  4.96it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:02, 17.38it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:02, 16.97it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:02, 16.73it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:02, 16.56it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:02, 16.58it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:01, 16.59it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:01, 16.58it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:01, 16.54it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 345/345 [01:14<00:00,  4.64it/s, val_loss=0.140, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1725-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1725-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 13.68it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9593825936317444
        F1-Macro            0.8005096912384033
       F1-Weighted          0.9596729278564453
       F1_Class 0           0.9838547110557556
       F1_Class 1           0.8976744413375854
       F1_Class 2           0.5199999809265137
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1468
> Total counterfactuals added 1468
> Counterfactual size 1174 at rate 0.8
> Train with CF samples 11019


Global seed set to 42


345 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 345/345 [01:50<00:00,  3.11it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:02, 18.08it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:02, 17.31it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:02, 17.35it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:02, 17.25it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:01, 17.25it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:01, 17.27it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:01, 17.29it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:01, 17.27it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 345/345 [01:55<00:00,  2.98it/s, val_loss=0.108, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1725-v2.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1725-v2.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 17.08it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9683184623718262
        F1-Macro            0.8067086935043335
       F1-Weighted          0.9675496220588684
       F1_Class 0           0.9898167252540588
       F1_Class 1           0.9186813235282898
       F1_Class 2           0.5116279125213623
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1507
> Total counterfactuals added 1507
> Counterfactual size 1206 at rate 0.8
> Train with CF samples 11051


Global seed set to 42


346 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 346/346 [01:51<00:00,  3.11it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:02, 14.55it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:02, 14.17it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:02, 14.22it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:02, 14.13it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:02, 14.17it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:02, 14.19it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:02, 14.21it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:02, 14.19it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 346/346 [01:56<00:00,  2.97it/s, val_loss=0.0755, val_f1_macro_


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1730.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1730.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 17.26it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9813160300254822
        F1-Macro            0.9085808992385864
       F1-Weighted          0.9809049963951111
       F1_Class 0           0.9929078221321106
       F1_Class 1           0.9502262473106384
       F1_Class 2            0.782608687877655
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1529
> Total counterfactuals added 1529
> Counterfactual size 1223 at rate 0.8
> Train with CF samples 11068


Global seed set to 42


346 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 346/346 [01:11<00:00,  4.82it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:02, 18.40it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:02, 17.87it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:02, 17.73it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:01, 17.57it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:01, 17.58it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:01, 17.58it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:01, 17.58it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:01, 17.55it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 346/346 [01:16<00:00,  4.51it/s, val_loss=0.431, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1730-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1730-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 15.01it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.8911454081535339
        F1-Macro             0.707449734210968
       F1-Weighted          0.9064792990684509
       F1_Class 0           0.9436392784118652
       F1_Class 1           0.8048780560493469
       F1_Class 2           0.37383177876472473
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1477
> Total counterfactuals added 1477
> Counterfactual size 1329 at rate 0.9
> Train with CF samples 11173


Global seed set to 42


350 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 350/350 [01:12<00:00,  4.82it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:01, 20.14it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:01, 19.44it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:01, 19.26it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:01, 19.13it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:01, 19.19it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:01, 19.22it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:01, 19.23it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:01, 19.20it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 350/350 [01:17<00:00,  4.53it/s, val_loss=0.387, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1750.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1750.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 17.21it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9138911366462708
        F1-Macro            0.6715493202209473
       F1-Weighted          0.9061073064804077
       F1_Class 0           0.9561022520065308
       F1_Class 1           0.7252124547958374
       F1_Class 2           0.3333333432674408
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1455
> Total counterfactuals added 1455
> Counterfactual size 1310 at rate 0.9
> Train with CF samples 11155


Global seed set to 42


349 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 349/349 [01:10<00:00,  4.94it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:02, 17.23it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:02, 16.82it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:02, 16.65it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:02, 16.54it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:02, 16.56it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:01, 16.52it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:01, 16.49it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:01, 16.48it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 349/349 [01:15<00:00,  4.63it/s, val_loss=0.117, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1745.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1745.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 13.67it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9642567038536072
        F1-Macro            0.7348355054855347
       F1-Weighted          0.9612895250320435
       F1_Class 0           0.9868153929710388
       F1_Class 1           0.9146608114242554
       F1_Class 2           0.3030303120613098
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1468
> Total counterfactuals added 1468
> Counterfactual size 1321 at rate 0.9
> Train with CF samples 11166


Global seed set to 42


349 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 349/349 [01:52<00:00,  3.10it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:02, 17.79it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:02, 17.06it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:02, 17.19it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:02, 17.11it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:01, 17.16it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:01, 17.17it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:01, 17.20it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:01, 17.18it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 349/349 [01:57<00:00,  2.98it/s, val_loss=0.0889, val_f1_macro_


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1745-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1745-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 17.06it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9764419198036194
        F1-Macro            0.7946640253067017
       F1-Weighted           0.974761962890625
       F1_Class 0           0.9959390759468079
       F1_Class 1           0.9380530714988708
       F1_Class 2           0.44999998807907104
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1507
> Total counterfactuals added 1507
> Counterfactual size 1356 at rate 0.9
> Train with CF samples 11201


Global seed set to 42


351 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 351/351 [01:52<00:00,  3.11it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:02, 14.49it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:02, 14.15it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:02, 14.16it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:02, 14.09it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:02, 14.15it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:02, 14.19it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:02, 14.22it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:02, 14.22it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 351/351 [01:57<00:00,  2.98it/s, val_loss=0.0815, val_f1_macro_


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1755.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1755.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 17.26it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9805036783218384
        F1-Macro            0.9000153541564941
       F1-Weighted          0.9799757599830627
       F1_Class 0           0.9918946027755737
       F1_Class 1           0.9525959491729736
       F1_Class 2           0.7555555701255798
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1529
> Total counterfactuals added 1529
> Counterfactual size 1376 at rate 0.9
> Train with CF samples 11221


Global seed set to 42


351 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 351/351 [01:12<00:00,  4.82it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:02, 18.23it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:02, 17.83it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:02, 17.65it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:02, 17.44it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:01, 17.44it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:01, 17.46it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:01, 17.50it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:01, 17.45it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 351/351 [01:17<00:00,  4.52it/s, val_loss=0.401, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1755-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1755-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 15.00it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.8838343024253845
        F1-Macro            0.7059022188186646
       F1-Weighted          0.9003002047538757
       F1_Class 0           0.9361472129821777
       F1_Class 1           0.8031809329986572
       F1_Class 2           0.37837839126586914
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1477
> Total counterfactuals added 1477
> Counterfactual size 1477 at rate 1.0
> Train with CF samples 11321


Global seed set to 42


354 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 354/354 [01:53<00:00,  3.11it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:01, 19.69it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:01, 19.33it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:01, 19.27it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:01, 19.11it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:01, 19.16it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:01, 19.19it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:01, 19.21it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:01, 19.20it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 354/354 [01:58<00:00,  2.98it/s, val_loss=0.428, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1770.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1770.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 17.17it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy             0.913078784942627
        F1-Macro             0.657595694065094
       F1-Weighted          0.9032593965530396
       F1_Class 0           0.9538017511367798
       F1_Class 1           0.7226890921592712
       F1_Class 2           0.29629629850387573
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1455
> Total counterfactuals added 1455
> Counterfactual size 1455 at rate 1.0
> Train with CF samples 11300


Global seed set to 42


354 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 354/354 [01:11<00:00,  4.97it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:02, 16.79it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:02, 16.61it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:02, 16.45it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:02, 16.40it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:02, 16.44it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:02, 16.47it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:01, 16.49it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:01, 16.47it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 354/354 [01:16<00:00,  4.64it/s, val_loss=0.0998, val_f1_macro_


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1770-v1.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1770-v1.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 13.65it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy             0.965069055557251
        F1-Macro            0.7941056489944458
       F1-Weighted          0.9646018743515015
       F1_Class 0           0.9857723712921143
       F1_Class 1           0.9203540086746216
       F1_Class 2           0.4761904776096344
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1468
> Total counterfactuals added 1468
> Counterfactual size 1468 at rate 1.0
> Train with CF samples 11313


Global seed set to 42


354 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 354/354 [01:53<00:00,  3.11it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:02, 18.21it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:02, 17.79it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:02, 17.51it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:02, 17.39it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:01, 17.40it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:01, 17.41it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:01, 17.39it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:01, 17.36it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 354/354 [01:58<00:00,  2.98it/s, val_loss=0.0884, val_f1_macro_


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1770-v2.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1770-v2.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 17.06it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy             0.969943106174469
        F1-Macro            0.7494276762008667
       F1-Weighted          0.9671310186386108
       F1_Class 0           0.9928717017173767
       F1_Class 1           0.9220778942108154
       F1_Class 2           0.3333333432674408
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1507
> Total counterfactuals added 1507
> Counterfactual size 1507 at rate 1.0
> Train with CF samples 11352


Global seed set to 42


355 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 355/355 [01:54<00:00,  3.11it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:02, 14.87it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:02, 14.52it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:02, 14.38it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:02, 14.32it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:02, 14.34it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:02, 14.34it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:02, 14.34it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:02, 14.33it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 355/355 [01:59<00:00,  2.97it/s, val_loss=0.0844, val_f1_macro_


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1775.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1775.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 17.27it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9756295680999756
        F1-Macro            0.8661625385284424
       F1-Weighted          0.9771074056625366
       F1_Class 0           0.9938900470733643
       F1_Class 1           0.9379310607910156
       F1_Class 2           0.6666666865348816
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
> Total Tweets used to generate counterfactuals 1529
> Total counterfactuals added 1529
> Counterfactual size 1529 at rate 1.0
> Train with CF samples 11374


Global seed set to 42


356 39


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Epoch 0: 100%|████████████████████████████████| 356/356 [01:54<00:00,  3.11it/s]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|                                        | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                           | 0/39 [00:00<?, ?it/s][A
Validation DataLoader 0:   3%|▍                  | 1/39 [00:00<00:02, 17.91it/s][A
Validation DataLoader 0:   5%|▉                  | 2/39 [00:00<00:02, 17.61it/s][A
Validation DataLoader 0:   8%|█▍                 | 3/39 [00:00<00:02, 17.63it/s][A
Validation DataLoader 0:  10%|█▉                 | 4/39 [00:00<00:01, 17.52it/s][A
Validation DataLoader 0:  13%|██▍                | 5/39 [00:00<00:01, 17.54it/s][A
Validation DataLoader 0:  15%|██▉                | 6/39 [00:00<00:01, 17.55it/s][A
Validation DataLoader 0:  18%|███▍               | 7/39 [00:00<00:01, 17.55it/s][A
Validation DataLoader 0:  21%|███▉               | 8/39 [00:00<00:01, 17.52it/s][A
Validation DataLoader 0:  23%|████▍          

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|█| 356/356 [01:59<00:00,  2.98it/s, val_loss=0.377, val_f1_macro_s


You are using a CUDA device ('NVIDIA GeForce RTX 3080') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Restoring states from the checkpoint path at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1780.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/atif/work/notebook-data/chatgpt/checkpoints/epoch=4-step=1780.ckpt
  rank_zero_warn(


Testing DataLoader 0: 100%|█████████████████████| 39/39 [00:02<00:00, 15.05it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        Accuracy            0.9008935689926147
        F1-Macro            0.6873461008071899
       F1-Weighted          0.9047609567642212
       F1_Class 0           0.9421841502189636
       F1_Class 1           0.8087431788444519
       F1_Class 2           0.31111112236976624
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Time elapsed 1668.32 min


In [23]:
complete_result

[{'Gender': {'Org': [[('splt_idx', 0, 946),
     ('train', 7563),
     {'Org': [{'F1-Macro': 0.7387224435806274,
        'F1-Weighted': 0.9135377407073975,
        'F1_Class 0': 0.949999988079071,
        'F1_Class 1': 0.833734929561615,
        'F1_Class 2': 0.4324324429035187,
        'Accuracy': 0.9143763184547424}]}],
    [('splt_idx', 1, 946),
     ('train', 7563),
     {'Org': [{'F1-Macro': 0.7776308059692383,
        'F1-Weighted': 0.936324417591095,
        'F1_Class 0': 0.9613559246063232,
        'F1_Class 1': 0.8715365529060364,
        'F1_Class 2': 0.5,
        'Accuracy': 0.937632143497467}]}],
    [('splt_idx', 2, 946),
     ('train', 7563),
     {'Org': [{'F1-Macro': 0.8429104089736938,
        'F1-Weighted': 0.9529786705970764,
        'F1_Class 0': 0.9722991585731506,
        'F1_Class 1': 0.9112709760665894,
        'F1_Class 2': 0.6451612710952759,
        'Accuracy': 0.9534883499145508}]}],
    [('splt_idx', 3, 946),
     ('train', 7563),
     {'Org': [{'F1-Macro':

In [24]:
# Gender, Religion, Race, *Politics, Sports
# change MAX_EPOCH, CF, n_splits --> dummy small exp

In [25]:
# [{'Accuracy': 0.9230769276618958,
#   'F1-Macro': 0.3199999928474426,
#   'F1-Weighted': 0.8861538171768188,
#   'F1_Class 0': 0.0,
#   'F1_Class 1': 0.9599999785423279,
#   'F1_Class 2': 0.0}]