In [2]:
import argparse

import pandas as pd
import numpy as np
import random

from tqdm.auto import tqdm

import transformers
import torch
import torchmetrics
import pytorch_lightning as pl

import wandb
from pytorch_lightning.loggers import WandbLogger
from pytorch_lightning.tuner import Tuner

from sklearn.model_selection import KFold

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
class Dataset(torch.utils.data.Dataset):
    def __init__(self, inputs, targets=[]):
        self.inputs = inputs
        self.targets = targets

    # 학습 및 추론 과정에서 데이터를 1개씩 꺼내오는 곳
    def __getitem__(self, idx):
        # 정답이 있다면 else문을, 없다면 if문을 수행합니다
        if len(self.targets) == 0:
            return torch.tensor(self.inputs[idx])
        else:
            return torch.tensor(self.inputs[idx]), torch.tensor(self.targets[idx])

    # 입력하는 개수만큼 데이터를 사용합니다
    def __len__(self):
        return len(self.inputs)

In [3]:
class Dataloader(pl.LightningDataModule):
    def __init__(self, model_name, batch_size, shuffle, train_path, dev_path, test_path, predict_path):
        super().__init__()
        self.model_name = model_name
        self.batch_size = batch_size
        self.shuffle = shuffle

        self.train_path = train_path
        self.dev_path = dev_path
        self.test_path = test_path
        self.predict_path = predict_path

        self.train_dataset = None
        self.val_dataset = None
        self.test_dataset = None
        self.predict_dataset = None

        self.tokenizer = transformers.AutoTokenizer.from_pretrained(model_name, max_length=128)
        self.target_columns = ['label']
        self.delete_columns = ['id']
        self.text_columns = ['sentence_1', 'sentence_2']

    def tokenizing(self, dataframe):
        data = []
        for idx, item in tqdm(dataframe.iterrows(), desc='tokenizing', total=len(dataframe)):
            # 두 입력 문장을 [SEP] 토큰으로 이어붙여서 전처리합니다.
            text = '[SEP]'.join([item[text_column] for text_column in self.text_columns])
            outputs = self.tokenizer(text, add_special_tokens=True, padding='max_length', truncation=True)
            data.append(outputs['input_ids'])
        return data

    def preprocessing(self, data):
        # 안쓰는 컬럼을 삭제합니다.
        data = data.drop(columns=self.delete_columns)

        # 타겟 데이터가 없으면 빈 배열을 리턴합니다.
        try:
            targets = data[self.target_columns].values.tolist()
        except:
            targets = []
        # 텍스트 데이터를 전처리합니다.
        inputs = self.tokenizing(data)

        return inputs, targets

    def setup(self, stage='fit'):
        if stage == 'fit':
            # 학습 데이터와 검증 데이터셋을 호출합니다
            train_data = pd.read_csv(self.train_path)
            val_data = pd.read_csv(self.dev_path)

            # 학습데이터 준비
            train_inputs, train_targets = self.preprocessing(train_data)

            # 검증데이터 준비
            val_inputs, val_targets = self.preprocessing(val_data)

            # train 데이터만 shuffle을 적용해줍니다, 필요하다면 val, test 데이터에도 shuffle을 적용할 수 있습니다
            self.train_dataset = Dataset(train_inputs, train_targets)
            self.val_dataset = Dataset(val_inputs, val_targets)
        else:
            # 평가데이터 준비
            test_data = pd.read_csv(self.test_path)
            test_inputs, test_targets = self.preprocessing(test_data)
            self.test_dataset = Dataset(test_inputs, test_targets)

            predict_data = pd.read_csv(self.predict_path)
            predict_inputs, predict_targets = self.preprocessing(predict_data)
            self.predict_dataset = Dataset(predict_inputs, [])

    def train_dataloader(self):
        return torch.utils.data.DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=args.shuffle)

    def val_dataloader(self):
        return torch.utils.data.DataLoader(self.val_dataset, batch_size=self.batch_size)

    def test_dataloader(self):
        return torch.utils.data.DataLoader(self.test_dataset, batch_size=self.batch_size)

    def predict_dataloader(self):
        return torch.utils.data.DataLoader(self.predict_dataset, batch_size=self.batch_size)

In [4]:
class KfoldDataloader(pl.LightningDataModule):
    def __init__(self, model_name, batch_size, shuffle, k, split_seed, num_splits, train_path, dev_path, test_path, predict_path):
        super().__init__()
        self.model_name = model_name
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.k = k
        self.split_seed = split_seed
        self.num_splits = num_splits

        self.train_path = train_path
        self.dev_path = dev_path
        self.test_path = test_path
        self.predict_path = predict_path
        
        self.train_dataset = None
        self.val_dataset = None
        self.test_dataset = None
        self.predict_dataset = None

        self.tokenizer = transformers.AutoTokenizer.from_pretrained(model_name, max_length=160)
        self.target_columns = ['label']
        self.delete_columns = ['id']
        self.text_columns = ['sentence_1', 'sentence_2']

    def tokenizing(self, dataframe):
        data = []
        for idx, item in tqdm(dataframe.iterrows(), desc='tokenizing', total=len(dataframe)):
            # 두 입력 문장을 [SEP] 토큰으로 이어붙여서 전처리합니다.
            text = '[SEP]'.join([item[text_column] for text_column in self.text_columns])
            outputs = self.tokenizer(text, add_special_tokens=True, padding='max_length', truncation=True)
            data.append(outputs['input_ids'])

        return data

    def preprocessing(self, data):
        # 안쓰는 컬럼을 삭제합니다.
        data = data.drop(columns=self.delete_columns)

        # 타겟 데이터가 없으면 빈 배열을 리턴합니다.
        try:
            targets = data[self.target_columns].values.tolist()
        except:
            targets = []
        # 텍스트 데이터를 전처리합니다.
        inputs = self.tokenizing(data)

        return inputs, targets

    def setup(self, stage='fit'):
        if stage == 'fit':
            # 데이터 준비
            train_data = pd.read_csv(self.train_path)
            val_data = pd.read_csv(self.dev_path)
            total_data = pd.concat([train_data, val_data])
            total_inputs, total_targets = self.preprocessing(total_data)
            total_dataset = Dataset(total_inputs, total_targets)

            # 데이터셋 num_splits 번 fold
            kf = KFold(n_splits=self.num_splits, shuffle=self.shuffle, random_state=self.split_seed)
            all_splits = [k for k in kf.split(total_data)]

            # k번째 fold 된 데이터셋의 index 선택
            train_indexes, val_indexes = all_splits[self.k]
            train_indexes, val_indexes = train_indexes.tolist(), val_indexes.tolist()

            # fold한 index에 따라 데이터셋 분할
            self.train_dataset = [total_dataset[x] for x in train_indexes]
            self.val_dataset = [total_dataset[x] for x in val_indexes]

        else:
            # 평가데이터 준비
            self.test_dataset = self.val_dataset
            
            predict_data = pd.read_csv(self.predict_path)
            predict_inputs, predict_targets = self.preprocessing(predict_data)
            self.predict_dataset = Dataset(predict_inputs, [])

    def train_dataloader(self):
        return torch.utils.data.DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=self.shuffle)

    def val_dataloader(self):
        return torch.utils.data.DataLoader(self.val_dataset, batch_size=self.batch_size)

    def test_dataloader(self):
        return torch.utils.data.DataLoader(self.test_dataset, batch_size=self.batch_size)

    def predict_dataloader(self):
        return torch.utils.data.DataLoader(self.predict_dataset, batch_size=self.batch_size)

In [5]:
class Model(pl.LightningModule):
    def __init__(self, model_name, lr, weight_decay, loss_func):
        super().__init__()
        self.save_hyperparameters()

        self.model_name = model_name
        self.lr = lr
        self.weight_decay = weight_decay
        # self.warmup_steps = warmup_steps
        # self.total_steps = total_steps

        # 사용할 모델을 호출합니다.
        self.plm = transformers.AutoModelForSequenceClassification.from_pretrained(
            pretrained_model_name_or_path=model_name, num_labels=1)
        # Loss 계산을 위해 사용될 손실함수를 호출합니다.
        if loss_func == "MSE":
            self.loss_func = torch.nn.MSELoss()
        elif loss_func == "L1":
            self.loss_func = torch.nn.L1Loss()
        elif loss_func == "Huber":
            self.loss_func = torch.nn.HuberLoss()

    def forward(self, x):
        x = self.plm(x)['logits']

        return x

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = self.loss_func(logits, y.float())
        self.log("train_loss", loss)

        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = self.loss_func(logits, y.float())
        self.log("val_loss", loss)

        self.log("val_pearson", torchmetrics.functional.pearson_corrcoef(logits.squeeze(), y.squeeze()))

        return loss

    def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)

        self.log("test_pearson", torchmetrics.functional.pearson_corrcoef(logits.squeeze(), y.squeeze()))

    def predict_step(self, batch, batch_idx):
        x = batch
        logits = self(x)

        return logits.squeeze()

    def configure_optimizers(self):
        optimizer = torch.optim.AdamW(self.parameters(), lr=self.lr)
        return optimizer

In [6]:
if __name__ == '__main__':
    folder_path = '/opt/ml/level1_semantictextsimilarity-nlp-14/SH'
    # seed
    seed = 42
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if use multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)
    pl.seed_everything(seed, workers=True)
    # 하이퍼 파라미터 등 각종 설정값을 입력받습니다
    # 터미널 실행 예시 : python3 run.py --batch_size=64 ...
    # 실행 시 '--batch_size=64' 같은 인자를 입력하지 않으면 default 값이 기본으로 실행됩니다
    parser = argparse.ArgumentParser()
    parser.add_argument('--model_name', default="klue/roberta-large", type=str)
    parser.add_argument('--batch_size', default=16, type=int)
    parser.add_argument('--max_epoch', default=1, type=int)
    parser.add_argument('--shuffle', default=True)
    parser.add_argument('--k', default=1, type=int)
    parser.add_argument('--split_seed', default=seed, type=int)
    parser.add_argument('--num_splits', default=10, type=int)
    parser.add_argument('--learning_rate', default=1e-5, type=float)
    parser.add_argument('--train_path', default=folder_path+'/data/train.csv')
    parser.add_argument('--dev_path', default=folder_path+'/data/dev.csv')
    parser.add_argument('--test_path', default=folder_path+'/data/dev.csv')
    parser.add_argument('--predict_path', default=folder_path+'/data/test.csv')
    parser.add_argument('--weight_decay', default=0.01)
    # parser.add_argument('--warm_up_ratio', default=0.3)
    parser.add_argument('--loss_func', default="MSE")
    parser.add_argument('--run_name', default="_r002")
    parser.add_argument('--project_name', default="MS_230412_002_KFold")
    args = parser.parse_args(args=[])

    ### actual model train
    # wandb logger
    wandb_logger = WandbLogger(project=args.project_name, 
                               name='mMS_230412_002'+ '_k' + str(args.num_splits) + args.run_name)

    # model을 생성합니다.
    # total_steps = (9324 // args.batch_size + (9324 % args.batch_size != 0)) * args.max_epoch
    # warmup_steps = int((9324 // args.batch_size + (9324 % args.batch_size != 0)) * args.warm_up_ratio)
    model = Model(
        args.model_name,
        args.learning_rate,
        args.weight_decay,
        # warmup_steps,
        # total_steps,
        args.loss_func
    )
    
    # gpu가 없으면 accelerator='cpu', 있으면 accelerator='gpu'
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    accelerator = 'gpu' if device == torch.device('cuda') else 'cpu'
    
    ######################################################################
    results = []
    # dataloader를 생성합니다. + K-fold
    for k in range(args.num_splits):
        dataloader = KfoldDataloader(args.model_name, 
                                 args.batch_size, 
                                 args.shuffle, 
                                 k,
                                 args.split_seed,
                                 args.num_splits,
                                 args.train_path, 
                                 args.dev_path, 
                                 args.test_path, 
                                 args.predict_path)
        dataloader.prepare_data()
        dataloader.setup()
        
        trainer = pl.Trainer(precision="16-mixed", 
                         accelerator='gpu', 
                         max_epochs=args.max_epoch, 
                         logger=wandb_logger,
                         log_every_n_steps=10)
        trainer.fit(model=model, datamodule=dataloader)
        score = trainer.test(model=model, datamodule=dataloader)

        results.extend(score)
    ######################################################################

    # 학습이 완료된 모델을 저장합니다.
    model_name = folder_name + f"models/MS_230412_002_{args.num_splits}fold_r002.pt"
    torch.save(model, model_name)

Global seed set to 42
[34m[1mwandb[0m: Currently logged in as: [33mtraintogpb[0m. Use [1m`wandb login --relogin`[0m to force relogin


Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.bias', 'lm_head.bias', 'lm_head.decoder.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifi

Sanity Checking DataLoader 0:   0%|          | 0/2 [00:00<?, ?it/s]

  rank_zero_warn(


                                                                           

  rank_zero_warn(


Epoch 0: 100%|██████████| 556/556 [05:04<00:00,  1.83it/s, v_num=moh4]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|          | 0/62 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/62 [00:00<?, ?it/s][A
Validation DataLoader 0:   2%|▏         | 1/62 [00:00<00:02, 26.42it/s][A
Validation DataLoader 0:   3%|▎         | 2/62 [00:00<00:05, 10.76it/s][A
Validation DataLoader 0:   5%|▍         | 3/62 [00:00<00:06,  8.49it/s][A
Validation DataLoader 0:   6%|▋         | 4/62 [00:00<00:07,  7.69it/s][A
Validation DataLoader 0:   8%|▊         | 5/62 [00:00<00:07,  7.28it/s][A
Validation DataLoader 0:  10%|▉         | 6/62 [00:00<00:07,  7.03it/s][A
Validation DataLoader 0:  11%|█▏        | 7/62 [00:01<00:08,  6.85it/s][A
Validation DataLoader 0:  13%|█▎        | 8/62 [00:01<00:08,  6.73it/s][A
Validation DataLoader 0:  15%|█▍        | 9/62 [00:01<00:07,  6.64it/s][A
Validation DataLoader 0:  16%|█▌        | 10/62 [00:01<00:07,  6.57it/s][A
Validation DataLoader 0

`Trainer.fit` stopped: `max_epochs=1` reached.


Epoch 0: 100%|██████████| 556/556 [05:22<00:00,  1.73it/s, v_num=moh4]


tokenizing: 100%|██████████| 1100/1100 [00:00<00:00, 2775.89it/s]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(


Testing DataLoader 0: 100%|██████████| 62/62 [00:10<00:00,  6.01it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      test_pearson          0.9229583144187927
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


tokenizing: 100%|██████████| 9874/9874 [00:03<00:00, 2900.18it/s]
Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
tokenizing: 100%|██████████| 9874/9874 [00:03<00:00, 2718.72it/s]
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type                             | Params
---------------------------------------------------------------
0 | plm       | RobertaForSequenceClassification | 336 M 
1 | loss_func | MSELoss                          | 0     
---------------------------------------------------------------
336 M     Trainable params
0         Non-trainable params
336 M     Total params
1,346.630 Total estimated model params size (MB)


Sanity Checking DataLoader 0:  50%|█████     | 1/2 [00:00<00:00, 22.38it/s]

  rank_zero_warn(


                                                                           

  rank_zero_warn(


Epoch 0: 100%|██████████| 556/556 [05:04<00:00,  1.82it/s, v_num=moh4]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|          | 0/62 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/62 [00:00<?, ?it/s][A
Validation DataLoader 0:   2%|▏         | 1/62 [00:00<00:02, 26.41it/s][A
Validation DataLoader 0:   3%|▎         | 2/62 [00:00<00:05, 10.72it/s][A
Validation DataLoader 0:   5%|▍         | 3/62 [00:00<00:06,  8.50it/s][A
Validation DataLoader 0:   6%|▋         | 4/62 [00:00<00:07,  7.68it/s][A
Validation DataLoader 0:   8%|▊         | 5/62 [00:00<00:07,  7.27it/s][A
Validation DataLoader 0:  10%|▉         | 6/62 [00:00<00:07,  7.02it/s][A
Validation DataLoader 0:  11%|█▏        | 7/62 [00:01<00:08,  6.85it/s][A
Validation DataLoader 0:  13%|█▎        | 8/62 [00:01<00:08,  6.74it/s][A
Validation DataLoader 0:  15%|█▍        | 9/62 [00:01<00:07,  6.64it/s][A
Validation DataLoader 0:  16%|█▌        | 10/62 [00:01<00:07,  6.57it/s][A
Validation DataLoader 0

`Trainer.fit` stopped: `max_epochs=1` reached.


Epoch 0: 100%|██████████| 556/556 [05:22<00:00,  1.72it/s, v_num=moh4]


tokenizing: 100%|██████████| 1100/1100 [00:00<00:00, 2610.26it/s]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(


Testing DataLoader 0: 100%|██████████| 62/62 [00:10<00:00,  6.01it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      test_pearson          0.9504839181900024
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


tokenizing: 100%|██████████| 9874/9874 [00:03<00:00, 2894.37it/s]
Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
tokenizing: 100%|██████████| 9874/9874 [00:03<00:00, 2719.29it/s]
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type                             | Params
---------------------------------------------------------------
0 | plm       | RobertaForSequenceClassification | 336 M 
1 | loss_func | MSELoss                          | 0     
---------------------------------------------------------------
336 M     Trainable params
0         Non-trainable params
336 M     Total params
1,346.630 Total estimated model params size (MB)


Sanity Checking DataLoader 0: 100%|██████████| 2/2 [00:00<00:00, 11.59it/s]

  rank_zero_warn(


                                                                           

  rank_zero_warn(


Epoch 0: 100%|██████████| 556/556 [05:04<00:00,  1.82it/s, v_num=moh4]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|          | 0/62 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/62 [00:00<?, ?it/s][A
Validation DataLoader 0:   2%|▏         | 1/62 [00:00<00:02, 25.94it/s][A
Validation DataLoader 0:   3%|▎         | 2/62 [00:00<00:05, 10.90it/s][A
Validation DataLoader 0:   5%|▍         | 3/62 [00:00<00:06,  8.56it/s][A
Validation DataLoader 0:   6%|▋         | 4/62 [00:00<00:07,  7.72it/s][A
Validation DataLoader 0:   8%|▊         | 5/62 [00:00<00:07,  7.30it/s][A
Validation DataLoader 0:  10%|▉         | 6/62 [00:00<00:07,  7.04it/s][A
Validation DataLoader 0:  11%|█▏        | 7/62 [00:01<00:08,  6.87it/s][A
Validation DataLoader 0:  13%|█▎        | 8/62 [00:01<00:08,  6.75it/s][A
Validation DataLoader 0:  15%|█▍        | 9/62 [00:01<00:07,  6.65it/s][A
Validation DataLoader 0:  16%|█▌        | 10/62 [00:01<00:07,  6.58it/s][A
Validation DataLoader 0

`Trainer.fit` stopped: `max_epochs=1` reached.


Epoch 0: 100%|██████████| 556/556 [05:22<00:00,  1.72it/s, v_num=moh4]


tokenizing: 100%|██████████| 1100/1100 [00:00<00:00, 2619.55it/s]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(


Testing DataLoader 0: 100%|██████████| 62/62 [00:10<00:00,  6.01it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      test_pearson          0.9665912985801697
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


tokenizing: 100%|██████████| 9874/9874 [00:03<00:00, 2705.86it/s]
Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
tokenizing: 100%|██████████| 9874/9874 [00:03<00:00, 2678.54it/s]
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type                             | Params
---------------------------------------------------------------
0 | plm       | RobertaForSequenceClassification | 336 M 
1 | loss_func | MSELoss                          | 0     
---------------------------------------------------------------
336 M     Trainable params
0         Non-trainable params
336 M     Total params
1,346.630 Total estimated model params size (MB)


Sanity Checking DataLoader 0:  50%|█████     | 1/2 [00:00<00:00, 23.45it/s]

  rank_zero_warn(


                                                                           

  rank_zero_warn(


Epoch 0: 100%|██████████| 556/556 [05:04<00:00,  1.82it/s, v_num=moh4]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|          | 0/62 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/62 [00:00<?, ?it/s][A
Validation DataLoader 0:   2%|▏         | 1/62 [00:00<00:02, 26.70it/s][A
Validation DataLoader 0:   3%|▎         | 2/62 [00:00<00:05, 10.75it/s][A
Validation DataLoader 0:   5%|▍         | 3/62 [00:00<00:06,  8.50it/s][A
Validation DataLoader 0:   6%|▋         | 4/62 [00:00<00:07,  7.69it/s][A
Validation DataLoader 0:   8%|▊         | 5/62 [00:00<00:07,  7.28it/s][A
Validation DataLoader 0:  10%|▉         | 6/62 [00:00<00:07,  7.02it/s][A
Validation DataLoader 0:  11%|█▏        | 7/62 [00:01<00:08,  6.85it/s][A
Validation DataLoader 0:  13%|█▎        | 8/62 [00:01<00:08,  6.73it/s][A
Validation DataLoader 0:  15%|█▍        | 9/62 [00:01<00:07,  6.64it/s][A
Validation DataLoader 0:  16%|█▌        | 10/62 [00:01<00:07,  6.57it/s][A
Validation DataLoader 0

`Trainer.fit` stopped: `max_epochs=1` reached.


Epoch 0: 100%|██████████| 556/556 [05:22<00:00,  1.72it/s, v_num=moh4]


tokenizing: 100%|██████████| 1100/1100 [00:00<00:00, 2512.06it/s]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(


Testing DataLoader 0: 100%|██████████| 62/62 [00:10<00:00,  6.01it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      test_pearson          0.9796621799468994
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


tokenizing: 100%|██████████| 9874/9874 [00:03<00:00, 2691.89it/s]
Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
tokenizing: 100%|██████████| 9874/9874 [00:03<00:00, 2531.08it/s]
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type                             | Params
---------------------------------------------------------------
0 | plm       | RobertaForSequenceClassification | 336 M 
1 | loss_func | MSELoss                          | 0     
---------------------------------------------------------------
336 M     Trainable params
0         Non-trainable params
336 M     Total params
1,346.630 Total estimated model params size (MB)


Sanity Checking DataLoader 0:  50%|█████     | 1/2 [00:00<00:00, 23.40it/s]

  rank_zero_warn(


                                                                           

  rank_zero_warn(


Epoch 0: 100%|██████████| 556/556 [05:04<00:00,  1.82it/s, v_num=moh4]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|          | 0/62 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/62 [00:00<?, ?it/s][A
Validation DataLoader 0:   2%|▏         | 1/62 [00:00<00:02, 26.75it/s][A
Validation DataLoader 0:   3%|▎         | 2/62 [00:00<00:05, 10.78it/s][A
Validation DataLoader 0:   5%|▍         | 3/62 [00:00<00:06,  8.51it/s][A
Validation DataLoader 0:   6%|▋         | 4/62 [00:00<00:07,  7.70it/s][A
Validation DataLoader 0:   8%|▊         | 5/62 [00:00<00:07,  7.29it/s][A
Validation DataLoader 0:  10%|▉         | 6/62 [00:00<00:07,  7.03it/s][A
Validation DataLoader 0:  11%|█▏        | 7/62 [00:01<00:08,  6.86it/s][A
Validation DataLoader 0:  13%|█▎        | 8/62 [00:01<00:08,  6.74it/s][A
Validation DataLoader 0:  15%|█▍        | 9/62 [00:01<00:07,  6.64it/s][A
Validation DataLoader 0:  16%|█▌        | 10/62 [00:01<00:07,  6.57it/s][A
Validation DataLoader 0

`Trainer.fit` stopped: `max_epochs=1` reached.


Epoch 0: 100%|██████████| 556/556 [05:22<00:00,  1.72it/s, v_num=moh4]


tokenizing: 100%|██████████| 1100/1100 [00:00<00:00, 2684.73it/s]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(


Testing DataLoader 0: 100%|██████████| 62/62 [00:10<00:00,  6.01it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      test_pearson          0.9783434867858887
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


tokenizing: 100%|██████████| 9874/9874 [00:03<00:00, 2847.48it/s]
Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
tokenizing: 100%|██████████| 9874/9874 [00:03<00:00, 2597.19it/s]
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type                             | Params
---------------------------------------------------------------
0 | plm       | RobertaForSequenceClassification | 336 M 
1 | loss_func | MSELoss                          | 0     
---------------------------------------------------------------
336 M     Trainable params
0         Non-trainable params
336 M     Total params
1,346.630 Total estimated model params size (MB)


Sanity Checking DataLoader 0:  50%|█████     | 1/2 [00:00<00:00, 20.58it/s]

  rank_zero_warn(


                                                                           

  rank_zero_warn(


Epoch 0: 100%|██████████| 556/556 [05:04<00:00,  1.82it/s, v_num=moh4]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|          | 0/62 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/62 [00:00<?, ?it/s][A
Validation DataLoader 0:   2%|▏         | 1/62 [00:00<00:02, 27.59it/s][A
Validation DataLoader 0:   3%|▎         | 2/62 [00:00<00:05, 10.72it/s][A
Validation DataLoader 0:   5%|▍         | 3/62 [00:00<00:06,  8.49it/s][A
Validation DataLoader 0:   6%|▋         | 4/62 [00:00<00:07,  7.68it/s][A
Validation DataLoader 0:   8%|▊         | 5/62 [00:00<00:07,  7.27it/s][A
Validation DataLoader 0:  10%|▉         | 6/62 [00:00<00:07,  7.02it/s][A
Validation DataLoader 0:  11%|█▏        | 7/62 [00:01<00:08,  6.85it/s][A
Validation DataLoader 0:  13%|█▎        | 8/62 [00:01<00:08,  6.73it/s][A
Validation DataLoader 0:  15%|█▍        | 9/62 [00:01<00:07,  6.63it/s][A
Validation DataLoader 0:  16%|█▌        | 10/62 [00:01<00:07,  6.56it/s][A
Validation DataLoader 0

`Trainer.fit` stopped: `max_epochs=1` reached.


Epoch 0: 100%|██████████| 556/556 [05:25<00:00,  1.71it/s, v_num=moh4]


tokenizing: 100%|██████████| 1100/1100 [00:00<00:00, 2712.57it/s]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(


Testing DataLoader 0: 100%|██████████| 62/62 [00:10<00:00,  6.01it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      test_pearson          0.9841296076774597
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


tokenizing: 100%|██████████| 9874/9874 [00:03<00:00, 2879.38it/s]
Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
tokenizing: 100%|██████████| 9874/9874 [00:03<00:00, 2687.67it/s]
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type                             | Params
---------------------------------------------------------------
0 | plm       | RobertaForSequenceClassification | 336 M 
1 | loss_func | MSELoss                          | 0     
---------------------------------------------------------------
336 M     Trainable params
0         Non-trainable params
336 M     Total params
1,346.630 Total estimated model params size (MB)


                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 0: 100%|██████████| 556/556 [05:04<00:00,  1.83it/s, v_num=moh4]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|          | 0/62 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/62 [00:00<?, ?it/s][A
Validation DataLoader 0:   2%|▏         | 1/62 [00:00<00:02, 26.45it/s][A
Validation DataLoader 0:   3%|▎         | 2/62 [00:00<00:05, 10.81it/s][A
Validation DataLoader 0:   5%|▍         | 3/62 [00:00<00:06,  8.52it/s][A
Validation DataLoader 0:   6%|▋         | 4/62 [00:00<00:07,  7.71it/s][A
Validation DataLoader 0:   8%|▊         | 5/62 [00:00<00:07,  7.29it/s][A
Validation DataLoader 0:  10%|▉         | 6/62 [00:00<00:07,  7.03it/s][A
Validation DataLoader 0:  11%|█▏        | 7/62 [00:01<00:08,  6.86it/s][A
Validation DataLoader 0:  13%|█▎        | 8/62 [00:01<00:08,  6.74it/s][A
Validation DataLoader 0:  15%|█▍        | 9/62 [00:01<00:07,  6.65it/s][A
Validation DataLoader 0:  16%|█▌        | 10/62 [00:01<00:07,  6.57it/s][A
Validation DataLoader 0

`Trainer.fit` stopped: `max_epochs=1` reached.


Epoch 0: 100%|██████████| 556/556 [05:23<00:00,  1.72it/s, v_num=moh4]


tokenizing: 100%|██████████| 1100/1100 [00:00<00:00, 2647.49it/s]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(


Testing DataLoader 0: 100%|██████████| 62/62 [00:10<00:00,  6.01it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      test_pearson          0.9882298111915588
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


tokenizing: 100%|██████████| 9874/9874 [00:03<00:00, 2615.55it/s]
Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
tokenizing: 100%|██████████| 9874/9874 [00:03<00:00, 2718.42it/s]
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type                             | Params
---------------------------------------------------------------
0 | plm       | RobertaForSequenceClassification | 336 M 
1 | loss_func | MSELoss                          | 0     
---------------------------------------------------------------
336 M     Trainable params
0         Non-trainable params
336 M     Total params
1,346.630 Total estimated model params size (MB)


                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 0: 100%|██████████| 556/556 [05:05<00:00,  1.82it/s, v_num=moh4]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|          | 0/62 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/62 [00:00<?, ?it/s][A
Validation DataLoader 0:   2%|▏         | 1/62 [00:00<00:02, 21.71it/s][A
Validation DataLoader 0:   3%|▎         | 2/62 [00:00<00:05, 10.80it/s][A
Validation DataLoader 0:   5%|▍         | 3/62 [00:00<00:06,  8.53it/s][A
Validation DataLoader 0:   6%|▋         | 4/62 [00:00<00:07,  7.71it/s][A
Validation DataLoader 0:   8%|▊         | 5/62 [00:00<00:07,  7.29it/s][A
Validation DataLoader 0:  10%|▉         | 6/62 [00:00<00:07,  7.03it/s][A
Validation DataLoader 0:  11%|█▏        | 7/62 [00:01<00:08,  6.86it/s][A
Validation DataLoader 0:  13%|█▎        | 8/62 [00:01<00:08,  6.74it/s][A
Validation DataLoader 0:  15%|█▍        | 9/62 [00:01<00:07,  6.65it/s][A
Validation DataLoader 0:  16%|█▌        | 10/62 [00:01<00:07,  6.58it/s][A
Validation DataLoader 0

`Trainer.fit` stopped: `max_epochs=1` reached.


Epoch 0: 100%|██████████| 556/556 [05:39<00:00,  1.64it/s, v_num=moh4]


tokenizing: 100%|██████████| 1100/1100 [00:00<00:00, 2566.14it/s]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(


Testing DataLoader 0: 100%|██████████| 62/62 [00:10<00:00,  6.01it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      test_pearson          0.9885068535804749
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


tokenizing: 100%|██████████| 9874/9874 [00:03<00:00, 2772.05it/s]
Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
tokenizing: 100%|██████████| 9874/9874 [00:03<00:00, 2666.73it/s]
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type                             | Params
---------------------------------------------------------------
0 | plm       | RobertaForSequenceClassification | 336 M 
1 | loss_func | MSELoss                          | 0     
---------------------------------------------------------------
336 M     Trainable params
0         Non-trainable params
336 M     Total params
1,346.630 Total estimated model params size (MB)


                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 0: 100%|██████████| 556/556 [05:04<00:00,  1.83it/s, v_num=moh4]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|          | 0/62 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/62 [00:00<?, ?it/s][A
Validation DataLoader 0:   2%|▏         | 1/62 [00:00<00:02, 26.65it/s][A
Validation DataLoader 0:   3%|▎         | 2/62 [00:00<00:05, 10.78it/s][A
Validation DataLoader 0:   5%|▍         | 3/62 [00:00<00:06,  8.50it/s][A
Validation DataLoader 0:   6%|▋         | 4/62 [00:00<00:07,  7.70it/s][A
Validation DataLoader 0:   8%|▊         | 5/62 [00:00<00:07,  7.28it/s][A
Validation DataLoader 0:  10%|▉         | 6/62 [00:00<00:07,  7.03it/s][A
Validation DataLoader 0:  11%|█▏        | 7/62 [00:01<00:08,  6.86it/s][A
Validation DataLoader 0:  13%|█▎        | 8/62 [00:01<00:08,  6.73it/s][A
Validation DataLoader 0:  15%|█▍        | 9/62 [00:01<00:07,  6.64it/s][A
Validation DataLoader 0:  16%|█▌        | 10/62 [00:01<00:07,  6.57it/s][A
Validation DataLoader 0

`Trainer.fit` stopped: `max_epochs=1` reached.


Epoch 0: 100%|██████████| 556/556 [05:24<00:00,  1.71it/s, v_num=moh4]


tokenizing: 100%|██████████| 1100/1100 [00:00<00:00, 2616.31it/s]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(


Testing DataLoader 0: 100%|██████████| 62/62 [00:10<00:00,  6.01it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      test_pearson          0.9922270774841309
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


tokenizing: 100%|██████████| 9874/9874 [00:03<00:00, 2807.12it/s]
Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
tokenizing: 100%|██████████| 9874/9874 [00:04<00:00, 2465.81it/s]
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type                             | Params
---------------------------------------------------------------
0 | plm       | RobertaForSequenceClassification | 336 M 
1 | loss_func | MSELoss                          | 0     
---------------------------------------------------------------
336 M     Trainable params
0         Non-trainable params
336 M     Total params
1,346.630 Total estimated model params size (MB)


                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 0: 100%|██████████| 556/556 [05:04<00:00,  1.82it/s, v_num=moh4]
Validation: 0it [00:00, ?it/s][A
Validation:   0%|          | 0/62 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/62 [00:00<?, ?it/s][A
Validation DataLoader 0:   2%|▏         | 1/62 [00:00<00:02, 26.78it/s][A
Validation DataLoader 0:   3%|▎         | 2/62 [00:00<00:05, 10.76it/s][A
Validation DataLoader 0:   5%|▍         | 3/62 [00:00<00:06,  8.51it/s][A
Validation DataLoader 0:   6%|▋         | 4/62 [00:00<00:07,  7.70it/s][A
Validation DataLoader 0:   8%|▊         | 5/62 [00:00<00:07,  7.28it/s][A
Validation DataLoader 0:  10%|▉         | 6/62 [00:00<00:07,  7.03it/s][A
Validation DataLoader 0:  11%|█▏        | 7/62 [00:01<00:08,  6.87it/s][A
Validation DataLoader 0:  13%|█▎        | 8/62 [00:01<00:08,  6.74it/s][A
Validation DataLoader 0:  15%|█▍        | 9/62 [00:01<00:07,  6.65it/s][A
Validation DataLoader 0:  16%|█▌        | 10/62 [00:01<00:07,  6.58it/s][A
Validation DataLoader 0

`Trainer.fit` stopped: `max_epochs=1` reached.


Epoch 0: 100%|██████████| 556/556 [05:24<00:00,  1.72it/s, v_num=moh4]


tokenizing: 100%|██████████| 1100/1100 [00:00<00:00, 2649.41it/s]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(


Testing DataLoader 0: 100%|██████████| 62/62 [00:10<00:00,  6.01it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      test_pearson          0.9934223294258118
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
