# NLP DL Pipeline (Inference)
# By Mohamed Eltayeb

# Import Libraries

In [1]:
# ====================================================
# Library
# ====================================================
import os
import gc
import re
import ast
import sys
import copy
import json
import time
import math
import string
import pickle
import random
import joblib
import glob
import itertools
import warnings
warnings.filterwarnings("ignore")

import scipy as sp
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
plt.rcParams["figure.figsize"] = (12, 8)
from tqdm.auto import tqdm
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import StratifiedKFold, GroupKFold, KFold
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer

import torch
import torch.nn as nn
from torch.nn import Parameter
from torch.optim import Optimizer
import torch.nn.init as init
import torch.nn.functional as F
from torch.autograd.function import InplaceFunction
from torch.optim import Adam, SGD, AdamW
from torch.utils.data import DataLoader, Dataset

# PyTorch Lightning imports:
import pytorch_lightning as pl
from pytorch_lightning import LightningModule, Trainer, seed_everything
from pytorch_lightning.tuner.tuning import Tuner
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping 
from pytorch_lightning.loggers import CSVLogger
from sklearn.metrics import cohen_kappa_score

import tokenizers
import transformers
print(f"tokenizers.__version__: {tokenizers.__version__}")
print(f"transformers.__version__: {transformers.__version__}")
from transformers import AutoTokenizer, AutoModel, AutoConfig
from transformers import get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup
from transformers import DataCollatorWithPadding
%env TOKENIZERS_PARALLELISM=False

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

tokenizers.__version__: 0.19.1
transformers.__version__: 4.41.2


2024-06-24 22:34:49.636775: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-06-24 22:34:49.636921: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-06-24 22:34:49.794167: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


env: TOKENIZERS_PARALLELISM=False


# Pipeline

## 1- Directory Settings

In [2]:
# ====================================================
# Directory settings
# ====================================================
import os

OUTPUT_DIR = './'
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)
    
TEST_PATH = '/kaggle/input/learning-agency-lab-automated-essay-scoring-2/test.csv'
SUBMISSION_PATH = '/kaggle/input/learning-agency-lab-automated-essay-scoring-2/sample_submission.csv'
MODELS_PATH = '/kaggle/input/aes-ckpts-2/'

## 2- Data Loading

In [3]:
# ====================================================
# Data Loading
# ====================================================
test_df = pd.read_csv(TEST_PATH)
submission = pd.read_csv(SUBMISSION_PATH)

print(f"test.shape: {test_df.shape}")
display(test_df.head())

print(f"submission.shape: {submission.shape}")
display(submission.head())

test.shape: (3, 2)


Unnamed: 0,essay_id,full_text
0,000d118,Many people have car where they live. The thin...
1,000fe60,I am a scientist at NASA that is discussing th...
2,001ab80,People always wish they had the same technolog...


submission.shape: (3, 2)


Unnamed: 0,essay_id,score
0,000d118,3
1,000fe60,3
2,001ab80,4


In [4]:
test_df['full_text'] = test_df['full_text'].apply(lambda x: x.replace('\n\n',' | '))

## 2- CFG

In [5]:
class CFG:
    competition = 'AES'   # Competition Name
    seed = 42
########################################################################################################
    # Data
    max_len = 512         # Max Sentence length  (This is an inital value. The current value is chosen below in the dataset)
    batch_size = 16        
    valid_batch_size = 16
    num_workers = 4       # Threads in Data Loader
    target_cols = ['score']
########################################################################################################
    # Training
    model = 'microsoft/deberta-v3-xsmall'
    epochs = 5

    freeze_n_layers = 0              # Freeze First n_layers of the Encoder
    layer_reinitialize_n = 0         # Reinitialize the last n layers of the encoder
    # Note: The fc layer is initalized in the model below. This option is for the encoder.
    
    # Note: 1dcnn still have bugs 
    pooling = 'mean'                  
        
    features_type = 'last_hidden_pooling'  # ['last_hidden_pooling','last_hidden_cls','2nd_last_hidden_cls',
                                           #  'concat_kth_last_hidden_cls','weighted_layers_cls', 'LSTM_pooling',
                                           #  'Attention_pooling']
########################################################################################################
    # Optimizer
    encoder_lr = 1.5e-5         # Pretrained Model lr  (Deberta Model)
    decoder_lr = 1.5e-5         # Custom Model lr  (The new head of the model)
    
    #LLRD is a method that applies higher learning rates for top layers and lower learning rates for bottom layers
    #The goal is to modify the lower layers that encode more general information less than the top layers that are more specific to the pre-training task.
    layerwise_learning_rate_decay = 1.0  
    eps = 1e-6                # Adam Parameters 
    betas=(0.9, 0.999)        # Adam Parameters
    weight_decay = 0.02
    
    precision = "16-mixed"
    use_8bit_optimizer = False
    if use_8bit_optimizer:
        precision = '32'

########################################################################################################
    # Scheduler
    use_scheduler = True    # Use Scheduler
    scheduler = 'cosine'      # 'cosine' or 'linear' or 'cosine_hard'
    num_cycles = 0.5
    num_warmup_steps = 0
    sch_interval = 'step'    # 'step' or 'epoch'

## 4- Utils

In [6]:
# ====================================================
# Utils
# ====================================================
def get_score(y_preds, y_trues):
    y_trues = y_trues.cpu().numpy()
    y_preds = y_preds.cpu().numpy()
    score = cohen_kappa_score(y_trues, y_preds, weights='quadratic')
    return score

class RMSELoss(nn.Module):
    def __init__(self, reduction='mean', eps=1e-9):
        super().__init__()
        self.mse = nn.MSELoss(reduction='none')
        self.reduction = reduction
        self.eps = eps

    def forward(self, y_pred, y_true):
        loss = torch.sqrt(self.mse(y_pred, y_true) + self.eps)
        if self.reduction == 'none':
            loss = loss
        elif self.reduction == 'sum':
            loss = loss.sum()
        elif self.reduction == 'mean':
            loss = loss.mean()
        return loss

seed_everything(seed=CFG.seed)

42

## 6- Tokenizer

In [7]:
# ====================================================
# tokenizer
# ====================================================
CFG.tokenizer = AutoTokenizer.from_pretrained(MODELS_PATH+'tokenizer/')

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


## 7- Dataset

In [8]:
# sort by length to speed up inference
test_df['tokenize_length'] = [len(CFG.tokenizer(text)['input_ids']) for text in test_df['full_text'].values]
test_df = test_df.sort_values('tokenize_length', ascending=True).reset_index(drop=True)
display(test_df.head())

Unnamed: 0,essay_id,full_text,tokenize_length
0,000fe60,I am a scientist at NASA that is discussing th...,398
1,000d118,Many people have car where they live. The thin...,597
2,001ab80,People always wish they had the same technolog...,633


In [9]:
# ====================================================
# Dataset
# ====================================================
# Split the sentences into tokens & Make the sentences have fixed length (padding for short - truncating for long)
def prepare_input(cfg, text):
    inputs = cfg.tokenizer.encode_plus(
        text, 
        return_tensors=None, 
        add_special_tokens=True,
        max_length=1024,
        pad_to_max_length=True,
        truncation=True
    )
    for k, v in inputs.items():
        inputs[k] = torch.tensor(v, dtype=torch.long)
    return inputs


class TestDataset(Dataset):
    def __init__(self, cfg, df):
        self.cfg = cfg
        self.texts = df['full_text'].values

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, item):
        inputs = prepare_input(self.cfg, self.texts[item])
        return {'input_ids' : inputs['input_ids'], 
                'attention_mask' : inputs['attention_mask']}

## 8- Model

In [10]:
def freeze(module):
    """
    Freezes module's parameters.
    """
    for parameter in module.parameters():
        parameter.requires_grad = False


def get_freezed_parameters(module):
    """
    Returns names of freezed parameters of the given module.
    """
    freezed_parameters = []
    for name, parameter in module.named_parameters():
        if not parameter.requires_grad:
            freezed_parameters.append(name)
    return freezed_parameters

#### Last Hidden State Poolings

In [11]:
# We can simply take torch.mean(last_hidden_state, 1) but rather we will be implementing something different. 
# We will make use of attention masks as well so that we can ignore padding tokens which is a better way of implementing 
# average embeddings.
class MeanPooling(nn.Module):
    def __init__(self):
        super(MeanPooling, self).__init__()
        
    def forward(self, last_hidden_state, attention_mask):
        input_mask_expanded = attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float()
        sum_embeddings = torch.sum(last_hidden_state * input_mask_expanded, 1)
        sum_mask = input_mask_expanded.sum(1)
        sum_mask = torch.clamp(sum_mask, min=1e-9)
        mean_embeddings = sum_embeddings / sum_mask
        return mean_embeddings

In [12]:
def get_optimizer_params(model, encoder_lr, decoder_lr, layerwise_learning_rate_decay, weight_decay=0.0):
        param_optimizer = list(model.named_parameters())
        no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
        optimizer_parameters = [
            {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)],
             'lr': encoder_lr, 'weight_decay': weight_decay},
            {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)],
             'lr': encoder_lr, 'weight_decay': 0.0},
            {'params': [p for n, p in model.named_parameters() if "fc" in n],
             'lr': decoder_lr, 'weight_decay': 0.0}
        ]
        if CFG.layerwise_learning_rate_decay != 1.0:
            layers = [model.embeddings] + list(model.encoder.layer)
            # Reverse due to LLRD to decrease the LR graduallay from last layer to first layer. 
            layers.reverse()  # In reverse order, the learning rate will start decreasing from the last layer
            optimizer_parameters = []
            for i, layer in enumerate(layers):
                if len(layers) - CFG.freeze_n_layers - 1 <= i:
                    break
                optimizer_parameters += [
                    {"params": [p for n, p in layer.named_parameters() if not any(nd in n for nd in no_decay)],
                    "lr": encoder_lr,"weight_decay": weight_decay},

                    {"params": [p for n, p in layer.named_parameters() if any(nd in n for nd in no_decay)],
                    "lr": encoder_lr,"weight_decay": 0.0},
                ]
                # Decay LR gradually each layer by rate of layerwise_learning_rate_decay
                encoder_lr *= layerwise_learning_rate_decay
                
            optimizer_parameters += [
                    {'params': [p for n, p in model.named_parameters() if "fc" in n],
                     'lr': decoder_lr, 'weight_decay': 0.0}
                ]
                
        return optimizer_parameters
    
    
def get_scheduler(cfg, optimizer, len_train_folds):
    num_train_steps = int(len_train_folds / cfg.batch_size * cfg.epochs)
    if cfg.scheduler == 'linear':
        scheduler = get_linear_schedule_with_warmup(
            optimizer, num_warmup_steps=cfg.num_warmup_steps, num_training_steps=num_train_steps
        )
    elif cfg.scheduler == 'cosine':
        scheduler = get_cosine_schedule_with_warmup(
            optimizer, num_warmup_steps=cfg.num_warmup_steps, num_training_steps=num_train_steps, num_cycles=cfg.num_cycles
        )
    elif cfg.scheduler == 'cosine_hard':
        scheduler = get_cosine_with_hard_restarts_schedule_with_warmup(
            optimizer, num_warmup_steps=cfg.num_warmup_steps, num_training_steps=num_train_steps, num_cycles=cfg.num_cycles
        )
        
    return scheduler

#### The Model

In [13]:
# ====================================================
# Model
# ====================================================
class CustomModel(LightningModule):
    def __init__(self, cfg=CFG, criterion=None, train_folds=None, valid_folds=None, fold=None, pretrained=False, config_path=None):
        super().__init__()
        self.cfg = cfg
        self.criterion = criterion
        self.train_folds = train_folds
        self.valid_folds = valid_folds
        self.fold = fold
        self.pretrained = pretrained
        self.val_step_outputs = []
        self.val_step_labels = []
        
        # Configurations
        if config_path is None:
            self.config = AutoConfig.from_pretrained(cfg.model, output_hidden_states = True)
            
            # Set Dropouts to 0 because they harm regression tasks
            self.config.hidden_dropout = 0
            self.config.hidden_dropout_prob = 0
            self.config.attention_dropout = 0
            self.config.attention_probs_dropout_prob = 0
        else:
            self.config = torch.load(config_path)
        
        # Model
        if pretrained:
            self.model = AutoModel.from_pretrained(cfg.model, config=self.config)
        else:
            self.model = AutoModel.from_config(self.config)
            
        torch.save(self.model.config, OUTPUT_DIR+'config.pth')
        
        # Choose the pooling method
        if self.cfg.pooling == 'mean':
            self.pool = MeanPooling()
            
        self.fc = nn.Linear(self.config.hidden_size, len(cfg.target_cols))
        
        # initalize the header
        self._init_weights(self.fc)
                
        # Reinitialize the last n layers      
        self._re_init_layers(self.cfg.layer_reinitialize_n)
        
        
        if self.cfg.freeze_n_layers >= 0:
            freeze(self.model.embeddings)
            freeze(self.model.encoder.layer[: self.cfg.freeze_n_layers])
            
    def _init_weights(self, module: nn.Module):
            if isinstance(module, nn.Linear):
                module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
                if module.bias is not None:
                    module.bias.data.zero_()
                return "nn.Linear"
            elif isinstance(module, nn.Embedding):
                module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
                if module.padding_idx is not None:
                    module.weight.data[module.padding_idx].zero_()
                return "nn.Embedding"
            elif isinstance(module, nn.LayerNorm):
                module.bias.data.zero_()
                module.weight.data.fill_(1.0)
                return "nn.LayerNorm"
            return None
    
    def _re_init_layers(self, n_layers: int):
            """Reinitialize the last n layers """
            if n_layers >= 1:
                for layer in self.model.encoder.layer[-n_layers:]:
                    # Confirmed that it works with deberta v3. Other models may be different.
                    if hasattr(layer, "modules"):
                        for module in layer.modules():
                            for name, child in module.named_children():
                                init_type_name = self._init_weights(child)
                                if init_type_name is not None:
                                    print(f"{name} is re-initialized, type: {init_type_name}, {module.__class__}")
            
    def train_dataloader(self):
        train_dataset = TrainDataset(CFG, self.train_folds)
        train_loader = DataLoader(train_dataset,
                                  batch_size=CFG.batch_size,
                                  shuffle=True,
                                  num_workers=CFG.num_workers,
                                  drop_last=True)
        return train_loader
    
    
    def val_dataloader(self):
        valid_dataset = TrainDataset(CFG, self.valid_folds)
        valid_loader = DataLoader(valid_dataset,
                                  batch_size=CFG.valid_batch_size,
                                  shuffle=False,
                                  num_workers=CFG.num_workers,
                                  drop_last=False)
        return valid_loader
    
    def training_step(self, batch, batch_idx):
        inputs, labels = {'input_ids': batch['input_ids'], 'attention_mask': batch['attention_mask']}, batch['labels']
        inputs = collate(inputs)
        batch_size = labels.size(0)

        y_preds = self(inputs)
        loss = self.criterion(y_preds, labels)

        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)

        for param_group in self.trainer.optimizers[0].param_groups:
            lr = param_group["lr"]
        self.log("lr", lr, on_step=True, on_epoch=False, prog_bar=True)

        return loss

    
    def validation_step(self, batch, batch_idx):
        inputs, labels = {'input_ids': batch['input_ids'], 'attention_mask': batch['attention_mask']}, batch['labels']
        inputs = collate(inputs)
        batch_size = labels.size(0)

        y_preds = self(inputs)
        loss = self.criterion(y_preds, labels)

        self.log('val_loss', loss, on_step=False, on_epoch=True, prog_bar=True, logger=True)

        self.val_step_outputs.append(y_preds)
        self.val_step_labels.append(labels)

        return loss

        
    def configure_optimizers(self):
        optimizer_parameters = get_optimizer_params(self.model,
                                                    encoder_lr=CFG.encoder_lr,
                                                    decoder_lr=CFG.decoder_lr,
                                                    layerwise_learning_rate_decay=CFG.layerwise_learning_rate_decay,
                                                    weight_decay=CFG.weight_decay)
        
        if self.cfg.use_8bit_optimizer:
            import bitsandbytes as bnb
            optimizer = bnb.optim.Adam8bit(optimizer_parameters,
                  lr=CFG.encoder_lr,
                  eps=CFG.eps,
                  betas=CFG.betas)
        else:
            optimizer = AdamW(optimizer_parameters,
                              lr=CFG.encoder_lr,
                              eps=CFG.eps,
                              betas=CFG.betas)
        
        scheduler = get_scheduler(CFG, optimizer, len(self.train_folds))
        lr_scheduler_dict = {"scheduler": scheduler, "interval": CFG.sch_interval}
        if self.cfg.use_scheduler:
            return {'optimizer': optimizer, 'lr_scheduler': lr_scheduler_dict}
        else:
            return {'optimizer': optimizer}
    
    def on_validation_epoch_end(self):
        all_preds = torch.cat(self.val_step_outputs)
        all_labels = torch.cat(self.val_step_labels)
        self.val_step_outputs.clear()
        self.val_step_labels.clear()
        
        # Clip the predictions between 0 and 5
        all_preds = torch.clamp(all_preds, min=0, max=5)    
        # Round the predictions
        all_preds = torch.round(all_preds)

        val_cks = get_score(all_preds, all_labels)
        self.log("CKS_val", val_cks, on_step=False, on_epoch=True, prog_bar=True)
        if self.trainer.global_rank == 0:
            print(f"\nEpoch: {self.current_epoch}, CKS_val: {val_cks}", flush=True)
           
    
    # Return hidden states 
    def feature(self, input_ids, attention_mask):
        outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
#         outputs = [logits, hidden_states]
        
        if self.cfg.features_type == 'last_hidden_pooling':
            last_hidden_states = outputs.last_hidden_state
            feature = self.pool(last_hidden_states, attention_mask)
        
        return feature
    
    # The Model Architicture
    def forward(self, batch):
        feature = self.feature(batch['input_ids'], batch['attention_mask'])
#         pooler_output = feature[1]       # Pooler Output (Return the Embeddings for further usage in ML Models)
        output = self.fc(feature)
        return output

## 11- Inference

In [14]:
def inference_fn(test_loader, model, device):
    preds = []
    model.eval()
    model.to(device)
    tk0 = tqdm(test_loader, total=len(test_loader))
    for inputs in tk0:
        for k, v in inputs.items():
            inputs[k] = v.to(device)
        with torch.no_grad():
            y_preds = model(inputs)
        preds.append(y_preds.to('cpu').numpy())
    predictions = np.concatenate(preds)
    return predictions


test_dataset = TestDataset(CFG, test_df)
test_loader = DataLoader(test_dataset,
                         batch_size=CFG.batch_size,
                         shuffle=False,
                         collate_fn=DataCollatorWithPadding(tokenizer=CFG.tokenizer, padding='longest'),
                         num_workers=CFG.num_workers, pin_memory=True, drop_last=False)

predictions = []
checkpoint_dir = f"{MODELS_PATH}"
checkpoint_paths = glob.glob(f"{checkpoint_dir}/*.ckpt")
for checkpoint_path in tqdm(checkpoint_paths):
        model = CustomModel.load_from_checkpoint(checkpoint_path, map_location=device, config_path=MODELS_PATH+"config.pth")
        prediction = inference_fn(test_loader, model, device)
        predictions.append(prediction)
        del model, prediction; gc.collect()
        torch.cuda.empty_cache()
predictions = np.mean(predictions, axis=0)

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Exception in thread QueueFeederThread:
Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/multiprocessing/queues.py", line 239, in _feed
    reader_close()
  File "/opt/conda/lib/python3.10/multiprocessing/connection.py", line 177, in close
    self._close()
  File "/opt/conda/lib/python3.10/multiprocessing/connection.py", line 361, in _close
    _close(self._handle)
OSError: [Errno 9] Bad file descriptor

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/opt/conda/lib/python3.10/threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "/opt/conda/lib/python3.10/multiprocessing/queues.py", line 271, in _feed
    queue_sem.release()
ValueError: semaphore or lock released too many times


  0%|          | 0/1 [00:00<?, ?it/s]

Exception in thread QueueFeederThread:
Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/multiprocessing/queues.py", line 239, in _feed
    reader_close()
  File "/opt/conda/lib/python3.10/multiprocessing/connection.py", line 177, in close
    self._close()
  File "/opt/conda/lib/python3.10/multiprocessing/connection.py", line 361, in _close
    _close(self._handle)
OSError: [Errno 9] Bad file descriptor

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/opt/conda/lib/python3.10/threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "/opt/conda/lib/python3.10/multiprocessing/queues.py", line 271, in _feed
    queue_sem.release()
ValueError: semaphore or lock released too many times


  0%|          | 0/1 [00:00<?, ?it/s]

Exception ignored in: <function _ConnectionBase.__del__ at 0x7c79265b0d30>
Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/multiprocessing/connection.py", line 132, in __del__
Exception in thread QueueFeederThread:
Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/multiprocessing/queues.py", line 239, in _feed
    reader_close()
  File "/opt/conda/lib/python3.10/multiprocessing/connection.py", line 177, in close
    self._close()
  File "/opt/conda/lib/python3.10/multiprocessing/connection.py", line 361, in _close
    self._close()
  File "/opt/conda/lib/python3.10/multiprocessing/connection.py", line 361, in _close
    _close(self._handle)
OSError: [Errno 9] Bad file descriptor
    _close(self._handle)
OSError: [Errno 9] Bad file descriptor

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "

  0%|          | 0/1 [00:00<?, ?it/s]

Exception in thread QueueFeederThread:
Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/multiprocessing/queues.py", line 239, in _feed
    reader_close()
  File "/opt/conda/lib/python3.10/multiprocessing/connection.py", line 177, in close
Exception ignored in: <function _ConnectionBase.__del__ at 0x7c79265b0d30>
Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/multiprocessing/connection.py", line 132, in __del__
    self._close()
  File "/opt/conda/lib/python3.10/multiprocessing/connection.py", line 361, in _close
    self._close()
  File "/opt/conda/lib/python3.10/multiprocessing/connection.py", line 361, in _close
    _close(self._handle)
OSError: [Errno 9] Bad file descriptor
    _close(self._handle)
OSError: [Errno 9] Bad file descriptor

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "

  0%|          | 0/1 [00:00<?, ?it/s]

Exception in thread QueueFeederThread:
Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/multiprocessing/queues.py", line 239, in _feed
    reader_close()
  File "/opt/conda/lib/python3.10/multiprocessing/connection.py", line 177, in close
Exception in thread QueueFeederThread:
Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/multiprocessing/queues.py", line 239, in _feed
    self._close()
  File "/opt/conda/lib/python3.10/multiprocessing/connection.py", line 361, in _close
    reader_close()
  File "/opt/conda/lib/python3.10/multiprocessing/connection.py", line 177, in close
    _close(self._handle)
OSError: [Errno 9] Bad file descriptor

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self._close()
  File "/opt/conda/lib/python3.10/multiprocessing/connection.py", line 361, in _close
    _close(self._handle)


# Submission

In [15]:
predictions

array([[1.9728327],
       [0.8690351],
       [3.4157243]], dtype=float32)

In [16]:
test_df[CFG.target_cols] = np.round(np.clip(predictions + 1, 1, 6))
submission = submission.drop(columns=CFG.target_cols).merge(test_df[['essay_id'] + CFG.target_cols], on='essay_id', how='left')
display(submission.head())
submission[['essay_id'] + CFG.target_cols].to_csv('submission.csv', index=False)

Unnamed: 0,essay_id,score
0,000d118,2.0
1,000fe60,3.0
2,001ab80,4.0
