In [1]:
import os
import random
import math
import gc
import time
import copy

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset


import transformers

from tqdm.auto import tqdm
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold

import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [2]:
class CFG :
    debug_one_epoch=False
    debug_one_fold=False
    run_cv=True
    only_infer=False
    num_workers=8
    num_epochs=10
    warmup_prop=0.1
    lr=5e-5
    max_len=512
    batch_size=8
    early_stopping_rounds=5
    random_seed=42
    model_path="microsoft/deberta-v3-base" 
    pretrained_path=""
    optimizer=torch.optim.AdamW
    criterion=torch.nn.MSELoss()
    device=device
    fold=5
    
def seed_torch(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_torch(CFG.random_seed)

In [3]:
if CFG.debug_one_epoch:
    CFG.num_epochs = 1

In [4]:
train = pd.read_csv("../input/commonlitreadabilityprize/train.csv")
test = pd.read_csv("../input/commonlitreadabilityprize/test.csv")
submission = pd.read_csv("../input/commonlitreadabilityprize/sample_submission.csv")

In [5]:
train["target"].describe()

count    2834.000000
mean       -0.959319
std         1.033579
min        -3.676268
25%        -1.690320
50%        -0.912190
75%        -0.202540
max         1.711390
Name: target, dtype: float64

In [6]:
model = transformers.AutoModel.from_pretrained(CFG.model_path)

config.json:   0%|          | 0.00/579 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/371M [00:00<?, ?B/s]

In [7]:
class CommonlitDataset(Dataset):
    def __init__(self, texts, targets, tokenizer, max_len=512, test=False):
        self.texts = texts
        if test == False:
            self.targets = targets
        self.test = test
        self.tokenizer = tokenizer
        self.max_len = max_len
        
    def __len__(self):
        return len(self.texts)
    
    def __getitem__(self, idx):
        text = self.texts[idx]
        inputs = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_len,
            padding="max_length",
            truncation=True,
            return_tensors="pt"
        )
        
        ids = inputs["input_ids"].squeeze()
        mask = inputs["attention_mask"].squeeze()
        
        targets = torch.tensor(-100, dtype=torch.float)
        if self.test == False:
            targets = torch.tensor(self.targets[idx], dtype=torch.float)
            
        return {
            "input_ids": torch.tensor(ids, dtype=torch.long),
            "attention_mask": torch.tensor(mask, dtype=torch.long),
            "targets":targets
        }

In [8]:
tokenizer = transformers.AutoTokenizer.from_pretrained(CFG.model_path)

test_dataset = CommonlitDataset(texts=test["excerpt"].values, targets=None, tokenizer=tokenizer, max_len=CFG.max_len, test=True)
test_dataloader = DataLoader(test_dataset,batch_size=CFG.batch_size, shuffle=False, num_workers=CFG.num_workers, pin_memory=True)

tokenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

spm.model:   0%|          | 0.00/2.46M [00:00<?, ?B/s]

In [9]:
model = transformers.AutoModelForSequenceClassification.from_pretrained(CFG.model_path, num_labels=1)
model.to(CFG.device)

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


DebertaV2ForSequenceClassification(
  (deberta): DebertaV2Model(
    (embeddings): DebertaV2Embeddings(
      (word_embeddings): Embedding(128100, 768, padding_idx=0)
      (LayerNorm): LayerNorm((768,), eps=1e-07, elementwise_affine=True)
      (dropout): StableDropout()
    )
    (encoder): DebertaV2Encoder(
      (layer): ModuleList(
        (0-11): 12 x DebertaV2Layer(
          (attention): DebertaV2Attention(
            (self): DisentangledSelfAttention(
              (query_proj): Linear(in_features=768, out_features=768, bias=True)
              (key_proj): Linear(in_features=768, out_features=768, bias=True)
              (value_proj): Linear(in_features=768, out_features=768, bias=True)
              (pos_dropout): StableDropout()
              (dropout): StableDropout()
            )
            (output): DebertaV2SelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-07, elementwise_affine

In [10]:
def train_one_epoch(model, dataloader, optimizer, scheduler, criterion):
    model.to(CFG.device)
    
    model.train()
    
    losses = []
    preds = []
    
    for data in tqdm(dataloader):
        input_ids = data["input_ids"].to(CFG.device)
        attention_mask = data["attention_mask"].to(CFG.device)
        targets = data["targets"].to(CFG.device)
        
        optimizer.zero_grad()
        outputs = model(input_ids, attention_mask)
        loss = criterion(outputs.logits.squeeze(-1), targets)
        loss.backward()
        pred = outputs.logits.squeeze(-1).detach().cpu().numpy()
        optimizer.step()
        scheduler.step()
        losses.append(loss.item())
        preds.append(pred)
    
    return np.mean(losses), np.concatenate(preds)

In [11]:
def eval_one_epoch(model, dataloader, criterion):
    model.to(CFG.device)
    model.eval()

    losses = []
    preds = []
    for data in tqdm(dataloader):
        with torch.no_grad():
            input_ids = data["input_ids"].to(CFG.device)
            attention_mask = data["attention_mask"].to(CFG.device)
            targets = data["targets"].to(CFG.device)
            outputs = model(input_ids, attention_mask)
            loss = criterion(outputs.logits.squeeze(-1), targets)
            pred = outputs.logits.squeeze(-1).cpu().numpy()
            losses.append(loss.item())
            preds.append(pred)
            
    return np.mean(losses), np.concatenate(preds)

In [12]:
def run_train_cv(train, test, tokenizer):
    kf = KFold(n_splits=CFG.fold, shuffle=True, random_state=CFG.random_seed) 
    for fold, (train_idx, valid_idx) in enumerate(kf.split(train)):
        print(f"=====================fold {fold}=====================")
        train_dataset = CommonlitDataset(texts=train.loc[train_idx, "excerpt"].values, targets=train.loc[train_idx, "target"].values, tokenizer=tokenizer, max_len=CFG.max_len, test=False)
        valid_dataset = CommonlitDataset(texts=train.loc[valid_idx, "excerpt"].values, targets=train.loc[valid_idx, "target"].values, tokenizer=tokenizer, max_len=CFG.max_len, test=False)
        train_dataloader = DataLoader(train_dataset, batch_size=CFG.batch_size, shuffle=True, num_workers=CFG.num_workers, pin_memory=True)
        valid_dataloader = DataLoader(valid_dataset, batch_size=CFG.batch_size, shuffle=False, num_workers=CFG.num_workers, pin_memory=True)

        model = transformers.AutoModelForSequenceClassification.from_pretrained(CFG.model_path, num_labels=1) 

        optimizer = CFG.optimizer(model.parameters(), lr=CFG.lr)
        scheduler = transformers.get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=len(train_dataloader)*CFG.num_epochs*CFG.warmup_prop, num_training_steps=len(train_dataloader)*CFG.num_epochs)
        criterion = CFG.criterion.to(CFG.device)

        test_preds = []
        
        best_rmse = np.inf
        best_model = None
        best_preds = None
        best_epoch = 0

        for epoch in range(CFG.num_epochs):
            train_loss, train_preds = train_one_epoch(model, train_dataloader, optimizer, scheduler, criterion)
            valid_loss, valid_preds = eval_one_epoch(model, valid_dataloader, criterion)
            valid_rmse = math.sqrt(mean_squared_error(valid_preds, train.loc[valid_idx, 'target'].values))
            print(f"epoch {epoch} train_loss : {train_loss}, valid_loss : {valid_loss}, valid_rmse :{valid_rmse}")
            
            if valid_rmse < best_rmse:
                print("best rmse is updated, save model")
                best_rmse = valid_rmse
                best_model = copy.deepcopy(model)
                best_preds = valid_preds
                best_epoch = epoch
            
            if CFG.early_stopping_rounds >= 0 and best_epoch-epoch >= CFG.early_stopping_rounds:
                print("early stopping")
                break
        
        torch.save(best_model.state_dict(), f"model_{fold}.pth")
        
        test_preds.append(eval_one_epoch(best_model, test_dataloader, criterion)[1])
        
        del model
        torch.cuda.empty_cache()

        if CFG.debug_one_fold == True: 
            break

    return test_preds

In [13]:
def only_infer(test, model):
    test_dataset = CommonlitDataset(texts=test["excerpt"].values, targets=None, tokenizer=tokenizer, max_len=CFG.max_len, test=True)
    test_dataloader = DataLoader(test_dataset, batch_size=CFG.batch_size, shuffle=False, num_workers=CFG.num_workers, pin_memory=True)
    test_preds = [] 
    
    for fold in range(CFG.fold):
        model = transformers.AutoModelForSequenceClassification.from_pretrained(CFG.model_path, num_labels=1)
        model.load_state_dict(torch.load(f"{CFG.pretrained_path}/model_{fold}.pth"))
        test_preds.append(eval_one_epoch(model, test_dataloader)[1])

    return test_preds

In [14]:
def main():
    if CFG.run_cv == True:
        test_preds = run_train_cv(train, test, tokenizer)
        
    elif CFG.only_infer == True:
        test_preds = only_infer(test, tokenizer)

    submission["target"] = np.mean(test_preds, axis=0)
    submission.to_csv("submission.csv", index=False)
    
if __name__ == "__main__":
    main()



Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/284 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

epoch 0 train_loss : 0.8262430070440324, valid_loss : 0.425376940361211, valid_rmse :0.6524059418261448
best rmse is updated, save model


  0%|          | 0/284 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

epoch 1 train_loss : 0.34348525415161546, valid_loss : 0.32015818278764335, valid_rmse :0.5662071126749415
best rmse is updated, save model


  0%|          | 0/284 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

epoch 2 train_loss : 0.20809667927383538, valid_loss : 0.28414780510143495, valid_rmse :0.5333771259782161
best rmse is updated, save model


  0%|          | 0/284 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

epoch 3 train_loss : 0.11146866709721917, valid_loss : 0.33513229182908233, valid_rmse :0.5792141617855013


  0%|          | 0/284 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c61006ac700>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1477, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1460, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c61006ac700>
Exception ignored in: Traceback (most recent call last):
<function _MultiProcessingDataLoaderIter.__del__ at 0x7c61006ac700>
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1477, in __del__
Traceback (most recent call last):
      File "/usr/local/lib/python3.10/dist-packages/tor

  0%|          | 0/71 [00:00<?, ?it/s]

epoch 4 train_loss : 0.0638042207144063, valid_loss : 0.37069836447776205, valid_rmse :0.6092057354836689


  0%|          | 0/284 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

epoch 5 train_loss : 0.03905192093471144, valid_loss : 0.30379374104906137, valid_rmse :0.5512780210293599


  0%|          | 0/284 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

epoch 6 train_loss : 0.023444311840879455, valid_loss : 0.34703310610542837, valid_rmse :0.5893255751983186


  0%|          | 0/284 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

epoch 7 train_loss : 0.016249983299287483, valid_loss : 0.296102751098888, valid_rmse :0.5442945262875442


  0%|          | 0/284 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

epoch 8 train_loss : 0.01250444471911514, valid_loss : 0.3177383777121423, valid_rmse :0.5638512340460861


  0%|          | 0/284 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

epoch 9 train_loss : 0.010632062148430686, valid_loss : 0.31284582258110316, valid_rmse :0.5595016820946572


  0%|          | 0/1 [00:00<?, ?it/s]



Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Exception in thread QueueFeederThread:
Traceback (most recent call last):
  File "/usr/lib/python3.10/multiprocessing/queues.py", line 239, in _feed
Exception in thread QueueFeederThreadException in thread QueueFeederThread:
Exception ignored in: :
<function _ConnectionBase.__del__ at 0x7c610093a7a0>
Traceback (most recent call last):
  File "/usr/lib/python3.10/multiprocessing/connection.py", line 132, in __del__
Traceback (most recent call last):
  File "/usr/lib/python3.10/multiprocessing/queues.py", line 239, in _feed
Traceback (most recent call last):
Exception in thread QueueFeederThread:
Traceback (most recent call last):
  File "/usr/

  0%|          | 0/284 [00:05<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c61006ac700>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1477, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1460, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/lib/python3.10/multiprocessing/queues.py", line 239, in _feed
    reader_close()
Traceback (most recent call last):
  File "/usr/lib/python3.10/multiprocessing/queues.py", line 239, in _feed
  File "/usr/lib/python3.10/multiprocessing/queues.py", line 239

  0%|          | 0/71 [00:00<?, ?it/s]

epoch 0 train_loss : 0.8494975655667589, valid_loss : 0.4664479219157931, valid_rmse :0.6829950780555083
best rmse is updated, save model


  0%|          | 0/284 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

epoch 1 train_loss : 0.34243195534119725, valid_loss : 0.3244488797557186, valid_rmse :0.5692971640813973
best rmse is updated, save model


  0%|          | 0/284 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

epoch 2 train_loss : 0.19331773111767228, valid_loss : 0.37985513333073806, valid_rmse :0.6164382057653618


  0%|          | 0/284 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

epoch 3 train_loss : 0.11115039723046438, valid_loss : 0.30752765623406625, valid_rmse :0.5544306038489206
best rmse is updated, save model


  0%|          | 0/284 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

epoch 4 train_loss : 0.06596624288826027, valid_loss : 0.3499875618538386, valid_rmse :0.5914718844947814


  0%|          | 0/284 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

epoch 5 train_loss : 0.03596681629760112, valid_loss : 0.30207241735827756, valid_rmse :0.5493538614898792
best rmse is updated, save model


  0%|          | 0/284 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

epoch 6 train_loss : 0.022683855738084187, valid_loss : 0.3335691144243932, valid_rmse :0.5774448259927061


  0%|          | 0/284 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

epoch 7 train_loss : 0.01583269247244125, valid_loss : 0.3407777472805809, valid_rmse :0.5836330258020161


  0%|          | 0/284 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

epoch 8 train_loss : 0.013313345030658471, valid_loss : 0.32639019170277556, valid_rmse :0.5711117151072626


  0%|          | 0/284 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c61006ac700>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1477, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1460, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c61006ac700>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1477, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1460, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/

  0%|          | 0/71 [00:00<?, ?it/s]

epoch 9 train_loss : 0.010850768771619392, valid_loss : 0.32174011844564493, valid_rmse :0.5670080632271685


  0%|          | 0/1 [00:00<?, ?it/s]



Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Exception in thread QueueFeederThread:
Traceback (most recent call last):
  File "/usr/lib/python3.10/multiprocessing/queues.py", line 239, in _feed
Exception in thread QueueFeederThread:
Traceback (most recent call last):
  File "/usr/lib/python3.10/multiprocessing/queues.py", line 239, in _feed
Exception in thread QueueFeederThread:
    reader_close()
  File "/usr/lib/python3.10/multiprocessing/connection.py", line 177, in close
Traceback (most recent call last):
  File "/usr/lib/python3.10/multiprocessing/queues.py", line 239, in _feed
Exception ignored in:     self._close()
  File "/usr/lib/python3.10/multiprocessing/connection.py", line 

  0%|          | 0/284 [00:05<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

epoch 0 train_loss : 0.7219195029973774, valid_loss : 0.3673794459499104, valid_rmse :0.6056797428284347
best rmse is updated, save model


  0%|          | 0/284 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

epoch 1 train_loss : 0.30924443761542647, valid_loss : 0.3709749378473826, valid_rmse :0.6090279036968698


  0%|          | 0/284 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

epoch 2 train_loss : 0.16199510285473415, valid_loss : 0.3462453238980871, valid_rmse :0.5884940692325188
best rmse is updated, save model


  0%|          | 0/284 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

epoch 3 train_loss : 0.09411015839342185, valid_loss : 0.34280851966058706, valid_rmse :0.5854629912158908
best rmse is updated, save model


  0%|          | 0/284 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c61006ac700>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1477, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1460, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c61006ac700>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1477, in __del__
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c61006ac700>
Traceback (most recent call last):
      File "/usr/local/lib/python3.10/dist-packages/tor

  0%|          | 0/71 [00:00<?, ?it/s]

epoch 4 train_loss : 0.05234980121792727, valid_loss : 0.3722747343946511, valid_rmse :0.6101250372283729


  0%|          | 0/284 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

epoch 5 train_loss : 0.031056953163679794, valid_loss : 0.38007742156025387, valid_rmse :0.6165330837391144


  0%|          | 0/284 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

epoch 6 train_loss : 0.02019830384213005, valid_loss : 0.332364999702279, valid_rmse :0.5765177855479232
best rmse is updated, save model


  0%|          | 0/284 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

epoch 7 train_loss : 0.01460231179830579, valid_loss : 0.34679575778648886, valid_rmse :0.5888888360369218


  0%|          | 0/284 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c61006ac700>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1477, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1460, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c61006ac700>
Traceback (most recent call last):
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c61006ac700>  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1477, in __del__

    self._shutdown_workers()Traceback (most recent call last):
Exception ignored in: 
<fun

  0%|          | 0/71 [00:00<?, ?it/s]

epoch 8 train_loss : 0.01187560187601878, valid_loss : 0.3554618917930294, valid_rmse :0.5962121063337587


  0%|          | 0/284 [00:40<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c61006ac700>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1477, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1460, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process


  0%|          | 0/71 [00:00<?, ?it/s]

epoch 9 train_loss : 0.01098171417137385, valid_loss : 0.35425348537908474, valid_rmse :0.5952014867657488


  0%|          | 0/1 [00:00<?, ?it/s]



Exception in thread QueueFeederThread:
Traceback (most recent call last):
  File "/usr/lib/python3.10/multiprocessing/queues.py", line 239, in _feed
Exception in thread QueueFeederThread:
Traceback (most recent call last):
Exception in thread QueueFeederThread:
Traceback (most recent call last):
  File "/usr/lib/python3.10/multiprocessing/queues.py", line 239, in _feed
  File "/usr/lib/python3.10/multiprocessing/queues.py", line 239, in _feed
    Exception in thread QueueFeederThread:
Traceback (most recent call last):
  File "/usr/lib/python3.10/multiprocessing/queues.py", line 239, in _feed
reader_close()
  File "/usr/lib/python3.10/multiprocessing/connection.py", line 177, in close
    reader_close()
  File "/usr/lib/python3.10/multiprocessing/connection.py", line 177, in close
        reader_close()
  File "/usr/lib/python3.10/multiprocessing/connection.py", line 177, in close
reader_close()
  File "/usr/lib/python3.10/multiprocessing/connection.py", line 177, in close
    self._cl

  0%|          | 0/284 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

epoch 0 train_loss : 0.755363195045838, valid_loss : 0.5013016995409845, valid_rmse :0.7077494473824056
best rmse is updated, save model


  0%|          | 0/284 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

epoch 1 train_loss : 0.33116627676071414, valid_loss : 0.30640728278479107, valid_rmse :0.5535218660339547
best rmse is updated, save model


  0%|          | 0/284 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

epoch 2 train_loss : 0.1900873142491344, valid_loss : 0.4962798226887072, valid_rmse :0.7045559754383383


  0%|          | 0/284 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

epoch 3 train_loss : 0.1019572105986709, valid_loss : 0.2857967265160151, valid_rmse :0.5347031875638123
best rmse is updated, save model


  0%|          | 0/284 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

epoch 4 train_loss : 0.0641023151684736, valid_loss : 0.3385174593455355, valid_rmse :0.5818971215821458


  0%|          | 0/284 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

epoch 5 train_loss : 0.03827966838865213, valid_loss : 0.3137401767389875, valid_rmse :0.5602638794336063


  0%|          | 0/284 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

epoch 6 train_loss : 0.022189636393384615, valid_loss : 0.33207685196063885, valid_rmse :0.5763790767038409


  0%|          | 0/284 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

epoch 7 train_loss : 0.013914261157558838, valid_loss : 0.34381130484628003, valid_rmse :0.5864780525228814


  0%|          | 0/284 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

epoch 8 train_loss : 0.011469823038361271, valid_loss : 0.3204684943800241, valid_rmse :0.5661775760128532


  0%|          | 0/284 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c61006ac700>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1477, in __del__
Exception ignored in:     <function _MultiProcessingDataLoaderIter.__del__ at 0x7c61006ac700>self._shutdown_workers()

Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1477, in __del__
      File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1460, in _shutdown_workers
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1460, in _shutdown_workers
    if w.is_alive():
if w.is_alive():  File "/usr/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
  File "/usr/lib/python3.10/multiprocessing/process.py", line 16

  0%|          | 0/71 [00:00<?, ?it/s]

epoch 9 train_loss : 0.010150383334790528, valid_loss : 0.333153029457784, valid_rmse :0.5772886581532006


  0%|          | 0/1 [00:00<?, ?it/s]



Exception ignored in: <function _ConnectionBase.__del__ at 0x7c610093a7a0>
Traceback (most recent call last):
  File "/usr/lib/python3.10/multiprocessing/connection.py", line 132, in __del__
    self._close()
  File "/usr/lib/python3.10/multiprocessing/connection.py", line 361, in _close
    _close(self._handle)
OSError: [Errno 9] Bad file descriptor
Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/284 [12:01<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

epoch 0 train_loss : 0.742502027506035, valid_loss : 0.3546655837079169, valid_rmse :0.5956565858137391
best rmse is updated, save model


  0%|          | 0/284 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

epoch 1 train_loss : 0.3303104869654061, valid_loss : 0.3336763986399476, valid_rmse :0.5778000132206219
best rmse is updated, save model


  0%|          | 0/284 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

epoch 2 train_loss : 0.1888730915420702, valid_loss : 0.26649521469650134, valid_rmse :0.516366452631016
best rmse is updated, save model


  0%|          | 0/284 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

epoch 3 train_loss : 0.11275776404507039, valid_loss : 0.2667704810663848, valid_rmse :0.5166528548763191


  0%|          | 0/284 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

epoch 4 train_loss : 0.06271500979930582, valid_loss : 0.44818467132642237, valid_rmse :0.6702171735977783


  0%|          | 0/284 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

epoch 5 train_loss : 0.03809418694876974, valid_loss : 0.31814140448687783, valid_rmse :0.5645654593874737


  0%|          | 0/284 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

epoch 6 train_loss : 0.021019714968082245, valid_loss : 0.31236978599303206, valid_rmse :0.5594552448179771


  0%|          | 0/284 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

epoch 7 train_loss : 0.014771978408557323, valid_loss : 0.3135680991788985, valid_rmse :0.5605275180342758


  0%|          | 0/284 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c61006ac700>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1477, in __del__
    self._shutdown_workers()Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c61006ac700>
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1460, in _shutdown_workers
    
if w.is_alive():Traceback (most recent call last):

  File "/usr/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
      File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1477, in __del__
assert self._parent_pid == os.getpid(), 'can only test a child process'    self._shutdown_workers()

AssertionError:   File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1460, in _shutdown_workers
    if w.is_alive():can only test a child process

  File "/usr/lib/

  0%|          | 0/71 [00:00<?, ?it/s]

epoch 8 train_loss : 0.011671071012937267, valid_loss : 0.33224347632535745, valid_rmse :0.5770129153527982


  0%|          | 0/284 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c61006ac700>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1477, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1460, in _shutdown_workers
    if w.is_alive():Exception ignored in: 
<function _MultiProcessingDataLoaderIter.__del__ at 0x7c61006ac700>  File "/usr/lib/python3.10/multiprocessing/process.py", line 160, in is_alive

    assert self._parent_pid == os.getpid(), 'can only test a child process'
Traceback (most recent call last):
AssertionError  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1477, in __del__
:     can only test a child processself._shutdown_workers()
Exception ignored in: Exception ignored in: 
<function _MultiProcessingDataLoaderIter.__del__ at 0x7c61006ac700>  File "/usr/local/lib/python3.10/dist-p

  0%|          | 0/71 [00:00<?, ?it/s]

epoch 9 train_loss : 0.010327416867293207, valid_loss : 0.31910894307452187, valid_rmse :0.5654521046183203


  0%|          | 0/1 [00:00<?, ?it/s]