In [None]:
!nvidia-smi

Wed Aug 24 05:18:55 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   38C    P0    28W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

# Imports

In [None]:
%%capture
!pip install transformers
!pip install datasets
!pip install torchmetrics
!pip install sentencepiece
!pip install nlpaug
# !pip install pytorch-lightning

In [None]:
import gc
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from google.colab import drive
import re
from tqdm.auto import tqdm
import glob

import random
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchmetrics import F1Score
from torch.utils.data import Dataset, DataLoader
from torch.cuda.amp import autocast, GradScaler

from datasets import load_dataset
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score, confusion_matrix

import transformers
from transformers import AutoConfig, AutoModel, AutoTokenizer
from transformers import AdamW, get_cosine_schedule_with_warmup, get_linear_schedule_with_warmup

# Augmentation
import nlpaug.augmenter.word as naw
import nlpaug.augmenter.char as nac
import nlpaug.augmenter.sentence as nas
import nlpaug.flow as nafc
import nltk
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')

def set_seed(seed=0):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

set_seed()

import warnings
warnings.simplefilter('ignore')



[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


In [None]:
drive.mount('/content/drive', force_remount=False)

Mounted at /content/drive


In [None]:
%cd /content/drive/MyDrive/signate_student_cup_2022

/content/drive/MyDrive/signate_student_cup_2022


# Main part

In [None]:
def text_cleaning(batch):
    clean_texts = []
    for text in batch["description"]:
        clean_lines = []
        lines = text.split(r"</li>")
        for line in lines:
            line = remove_tag(line)
            clean_line = re.sub(r' &amp; ', ' and ', line)
            clean_line = re.sub(r'&amp;', '', clean_line)

            clean_line = re.sub(r'&lt;.*?style.*?&gt;', '', clean_line)
            clean_line = re.sub(r'&lt;', ' less than ', clean_line)
            clean_line = re.sub(r'&gt;', ' more than ', clean_line)
            clean_line = re.sub(r'\\u202f', '', clean_line)
            clean_line = re.sub(r'\\xa0', '', clean_line)
            clean_line = re.sub(r'\\', '', clean_line)

            clean_line = re.sub(r'http?://[\w/:%#\$&\?\(\)~\.=\+\-]+', '', clean_line)
            clean_line = re.sub(r'https?://[\w/:%#\$&\?\(\)~\.=\+\-]+', '', clean_line)

            clean_line = clean_line.strip()

            clean_line = clean_line + ('' if clean_line.endswith('.') else '.')
            if len(clean_line)!=1:
                clean_lines.append(clean_line)
        clean_texts.append(' '.join(clean_lines))
    return {"clean_description": clean_texts}

def remove_tag(x):
    p = re.compile(r"<[^>]*?>")
    return p.sub('',x)

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [None]:
from datasets import Dataset
def get_train_data(train_path):
    train_df = pd.read_csv(train_path)
    train_df["jobflag"] -= 1
    train_ds = Dataset.from_pandas(train_df)
    train_ds = train_ds.map(text_cleaning, batched=True, batch_size=None)
    return train_ds

def get_test_data(test_path):
    test_df = pd.read_csv(test_path)
    test_ds = Dataset.from_pandas(test_df)
    test_ds = test_ds.map(text_cleaning, batched=True, batch_size=None)
    return test_ds

In [None]:
class Job_CLS_Dataset(Dataset):
    def __init__(self, config, descriptions, labels=None, max_token_len: int=128, mode=None):
        self.config = config
        self.descriptions = descriptions
        self.labels = labels

        self.tokenizer = AutoTokenizer.from_pretrained(config.model_name)
        self.max_token_len = max_token_len

        if mode=="Train":
            augs = {}
            augs["synonym_replace"] = naw.SynonymAug(aug_src='wordnet')
            augs["random_insert"] = naw.ContextualWordEmbsAug(model_path="distilbert-base-uncased", 
                                            device="cuda", action="insert", aug_max=1)            
            augs["random_substitute"] = naw.ContextualWordEmbsAug(model_path="distilbert-base-uncased", 
                                            device="cuda", action="substitute", aug_p=0.5)
            augs["random_swap"] = naw.RandomWordAug(action="swap")
            augs["random_delete"] = naw.RandomWordAug()
            self.augs = augs

        self.mode = mode

    def __len__(self):
        return len(self.descriptions)

    def __getitem__(self, index):
        description = self.descriptions[index]
        if self.mode=="Train":
            if np.random.rand() < 0.05:
                description = self.augs["synonym_replace"].augment(description)[0]
            if np.random.rand() < 0.05:
                description = self.augs["random_insert"].augment(description)[0]
            if np.random.rand() < 0.05:
                description = self.augs["random_substitute"].augment(description)[0]
            if np.random.rand() < 0.05:
                description = self.augs["random_swap"].augment(description)[0]
            if np.random.rand() < 0.05:
                description = self.augs["random_delete"].augment(description)[0]

            # Augmentation
        encoded = self.tokenizer.encode_plus(description,
                                                add_special_tokens=True,
                                                return_tensors='pt',
                                                truncation=True,
                                                padding='max_length',
                                                max_length=self.max_token_len,
                                                return_attention_mask = True)
        if self.labels is not None:
            label = torch.tensor(self.labels[index], dtype=torch.int64)
            return encoded.input_ids.flatten(), encoded.attention_mask.flatten(), label
        else:
            return encoded.input_ids.flatten(), encoded.attention_mask.flatten()

class Job_CLS_Model(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.config = config

        cfg = AutoConfig.from_pretrained(
            config.model_name,
            output_hidden_states=True
        )
        self.pretrained_model = AutoModel.from_pretrained(config.model_name, config=cfg)

        self.hidden = torch.nn.Linear(self.pretrained_model.config.hidden_size, int(self.pretrained_model.config.hidden_size/2))
        torch.nn.init.xavier_uniform_(self.hidden.weight)

        self.classifier = torch.nn.Linear(int(self.pretrained_model.config.hidden_size/2), config.n_labels)
        torch.nn.init.xavier_uniform_(self.classifier.weight)
        
        self.loss_func = nn.CrossEntropyLoss(weight=torch.tensor([1.0, 5.0, 1.0, 1.0]))

        self.f1_func = F1Score(num_classes=config.n_labels, average="macro")
        self.logits_dropouts = nn.ModuleList([nn.Dropout(p=0.1) for _ in range(config.num_msd)])
    

    def forward(self, input_ids, attention_mask, labels=None):

        output = self.pretrained_model(input_ids=input_ids, attention_mask=attention_mask)["last_hidden_state"]
        output = output[:, 0, :]
        pooled_output = sum([self.hidden(dropout(output)) for dropout in self.logits_dropouts]) / self.config.num_msd
        pooled_output = F.relu(pooled_output)
        logits = sum([self.classifier(dropout(pooled_output)) for dropout in self.logits_dropouts]) / self.config.num_msd

        if labels is not None:
            loss = self.loss_func(logits, labels)
            with torch.no_grad():
                pass# F1の計算??
            return logits, loss
        else:
            return logits

In [None]:
def debug(ds):
    df = pd.DataFrame(ds).sort_values("jobflag").reset_index(drop=True)
    for i in range(4):
        print(f'# ===== jobflag={i} ===== #\n')
        temp_df = df[df["jobflag"]==i].reset_index(drop=True)
        for j in range(len(temp_df)):
            print("    ", temp_df.loc[j, "clean_description"], "\n")

In [None]:
def training(config, train_ds):

    folds = StratifiedKFold(n_splits=config.num_fold)
    splits = folds.split(np.zeros(train_ds.num_rows), train_ds["jobflag"])

    oof_pred = np.zeros((len(train_ds), 4), dtype=np.float32)
    fold_num = np.zeros(len(train_ds), dtype=np.int32)

    for fold, (train_idxs, val_idxs) in enumerate(splits):

        print(f'\n# ==== start fold{fold+1} ==== #\n')
        train_fold_ds = train_ds.select(train_idxs)
        valid_fold_ds = train_ds.select(val_idxs)

        # debug(valid_fold_ds)# ========================

        train_dataset = Job_CLS_Dataset(config, train_fold_ds["clean_description"], train_fold_ds["jobflag"])# mode="Train"
        valid_dataset = Job_CLS_Dataset(config, valid_fold_ds["clean_description"], valid_fold_ds["jobflag"])

        train_loader = DataLoader(
            dataset=train_dataset, 
            batch_size=config.batch_size, 
            shuffle=True,
            drop_last=False
        )
        valid_loader = DataLoader(
            dataset=valid_dataset,
            batch_size=config.batch_size,
            shuffle=False,
            drop_last=False
        )

        best_val_preds = None
        best_val_f1_score = -1

        model = Job_CLS_Model(config)
        model.to(config.device)

        param_optimizer = list(model.named_parameters())
        no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
        optimizer_grouped_parameters = []
        optimizer_grouped_parameters.append({
            'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 
            'weight_decay': config.weight_decay
        })
        optimizer_grouped_parameters.append({
            'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 
            'weight_decay': 0.0
        })
        optimizer = AdamW(
            optimizer_grouped_parameters,
            lr=config.lr,
            betas=config.beta,
            weight_decay=config.weight_decay,
        )

        num_train_optimization_steps = int(
            len(train_loader) * config.n_epochs // config.gradient_accumulation_steps
        )
        num_warmup_steps = int(num_train_optimization_steps * config.num_warmup_steps_rate)
        scheduler = get_cosine_schedule_with_warmup(
            optimizer,
            num_warmup_steps=num_warmup_steps,
            num_training_steps=num_train_optimization_steps
        )
        num_eval_step = len(train_loader) // config.num_eval + config.num_eval

        for epoch in range(config.n_epochs):
            # training
            print(f"# ============ start epoch:{epoch+1} ============== #")
            model.train()
            train_preds = []
            train_labels = []
            val_losses_batch = []
            scaler = GradScaler()
            with tqdm(train_loader, total=len(train_loader)) as pbar:
                for step, (input_ids, attention_mask, labels) in enumerate(pbar):

                    input_ids = input_ids.to(config.device)
                    attention_mask = attention_mask.to(config.device)
                    labels = labels.to(config.device)

                    optimizer.zero_grad()
                    with autocast():
                        output, loss = model(input_ids, attention_mask, labels)
                    pbar.set_postfix({
                        'loss': loss.item(),
                        'lr': scheduler.get_lr()[0]
                    })

                    if config.gradient_accumulation_steps>1:
                        loss = loss/config.gradient_accumlation_steps
                    scaler.scale(loss).backward()

                    if (step+1)%config.gradient_accumulation_steps == 0:
                        scaler.step(optimizer)
                        scaler.update()
                        scheduler.step()
                    
                    output = output.softmax(axis=1).detach().cpu().numpy()
                    train_preds.append(output)
                    train_labels.append(labels.detach().cpu().numpy())

            train_preds = np.concatenate(train_preds)
            train_labels = np.concatenate(train_labels)
            train_f1_score = f1_score(np.argmax(train_preds, axis=1), train_labels, average='macro')
            print(f'Training score: {train_f1_score}')
            # each F1
            each_F1 = []
            for l in range(4):
                temp_preds = np.where(np.argmax(train_preds, axis=1)==l, 1, 0)
                temp_labels = np.where(train_labels==l, 1, 0)
                con_matrix = confusion_matrix(temp_labels, temp_preds)
                TN = con_matrix[0][0] # 真陰性
                FP = con_matrix[0][1] # 偽陽性
                FN = con_matrix[1][0] # 偽陰性
                TP = con_matrix[1][1] # 真陽性
                precision = TP/(TP+FP)
                recall = TP/(TP+FN)
                temp_f1 = (2*precision*recall) / (precision+recall)
                each_F1.append(temp_f1)
            print(f'    Each F1: {each_F1}')

            # evaluating
            val_preds = []
            val_labels = []
            val_losses = []
            val_nums = []
            model.eval()
            with torch.no_grad():
                with tqdm(valid_loader, total=len(valid_loader)) as pbar:
                    for (input_ids, attention_mask, labels) in pbar:

                        input_ids = input_ids.to(config.device)
                        attention_mask = attention_mask.to(config.device)
                        labels = labels.to(config.device)
                        with autocast():
                            output, loss = model(input_ids, attention_mask, labels)
                        output = output.softmax(axis=1).detach().cpu().numpy()
                        val_preds.append(output)
                        val_labels.append(labels.detach().cpu().numpy())
                        val_losses.append(loss.item() * len(labels))
                        val_nums.append(len(labels))
                        pbar.set_postfix({
                            'val_loss': loss.item()
                        })

            val_preds = np.concatenate(val_preds)
            val_labels = np.concatenate(val_labels)
            val_loss = sum(val_losses) / sum(val_nums)
            val_f1_score = f1_score(np.argmax(val_preds, axis=1), val_labels, average='macro')

            val_log = {
                'val_loss': val_loss,
                'val_f1_score': val_f1_score,
            }
            display(val_log)

            # each F1
            each_F1 = []
            for l in range(4):
                temp_preds = np.where(np.argmax(val_preds, axis=1)==l, 1, 0)
                temp_labels = np.where(val_labels==l, 1, 0)
                con_matrix = confusion_matrix(temp_labels, temp_preds)
                TN = con_matrix[0][0] # 真陰性
                FP = con_matrix[0][1] # 偽陽性
                FN = con_matrix[1][0] # 偽陰性
                TP = con_matrix[1][1] # 真陽性
                precision = TP/(TP+FP)
                recall = TP/(TP+FN)
                temp_f1 = (2*precision*recall) / (precision+recall)
                each_F1.append(temp_f1)
            print(f'    Each F1: {each_F1}')

            if best_val_f1_score < val_f1_score:
                print("\n [ save model weight ] \n")
                best_val_preds = val_preds
                best_val_f1_score = val_f1_score
                torch.save(
                    model.state_dict(), 
                    os.path.join(config.model_save_path, f"fold{fold}.pth")
                )
            
        oof_pred[val_idxs] = best_val_preds.astype(np.float32)
        fold_num[val_idxs] = fold+1
        del model; gc.collect()

    # scoring
    score = f1_score(np.argmax(oof_pred, axis=1), train_ds['jobflag'], average='macro')
    print('CV:', round(score, 5))
    return score, oof_pred, fold_num

In [None]:
def inferring(config, test_ds):
    config.model_weights = [p for p in sorted(glob.glob(os.path.join(config.model_save_path, 'fold*.pth')))]
    sub_pred = np.zeros((len(test_ds), 4), dtype=np.float32)
    for fold, model_weight in enumerate(config.model_weights):
        test_dataset = Job_CLS_Dataset(config,  test_ds["clean_description"])
        test_loader = DataLoader(
            dataset=test_dataset, 
            batch_size=config.batch_size, 
            shuffle=False,
            drop_last=False
        )
        model = Job_CLS_Model(config)
        model.load_state_dict(torch.load(model_weight))
        model.to(config.device)

        model.eval()
        fold_pred = []
        with torch.no_grad():
            with tqdm(test_loader, total=len(test_loader)) as pbar:
                for (input_ids, attention_mask) in pbar:

                    input_ids = input_ids.to(config.device)
                    attention_mask = attention_mask.to(config.device)
                    with autocast():
                        output = model(input_ids, attention_mask)
                    output = output.softmax(axis=1).detach().cpu().numpy()
                    fold_pred.append(output)
        fold_pred = np.concatenate(fold_pred)
        # np.save(os.path.join(cfg.EXP_PREDS, f'sub_pred_fold{fold}.npy'), fold_pred)
        sub_pred += fold_pred / len(config.model_weights)
        del model; gc.collect()

    np.save(os.path.join(config.preds_save_path, 'sub_pred.npy'), sub_pred)
    return sub_pred

In [None]:
class Config:
    # private
    # _colab_path = "/content/drive/MyDrive/signate_student_cup_2022" 
    _exp_num = '002'

    # 学習param
    seed = 0
    model_name = "microsoft/deberta-base"# roberta-base, microsoft/deberta-base, microsoft/deberta-v3-base
    num_fold = 8
    batch_size = 32
    n_epochs = 20
    max_token_len = 128
    lr = 2e-5

    weight_decay = 2e-5
    beta = (0.9, 0.98)
    num_warmup_steps_rate = 0.001
    gradient_accumulation_steps = 1
    num_eval = 1

    n_labels = 4
    num_msd = 8
    

    # 保存先
    save_folder_name = f'Exp{_exp_num}_{model_name}_fold{num_fold}_epoch{n_epochs}_tokenlen{max_token_len}_lr{lr}'
    
def setup(config):
    print("### Configration Setup...")
    config.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # mount
    from google.colab import drive
    if not os.path.isdir('/content/drive'):
        drive.mount('/content/drive') 

    config.output_path = './outputs'
    config.experiment_path = os.path.join(config.output_path, config.save_folder_name)
    print(f'    experiment_path  >> {config.experiment_path}')
    config.model_save_path = os.path.join(config.experiment_path, 'model')
    print(f'    model_save_path >> {config.model_save_path}')
    config.figure_save_path = os.path.join(config.experiment_path, 'figure')
    print(f'    figure_save_path >> {config.figure_save_path}')
    config.preds_save_path = os.path.join(config.experiment_path, 'preds')
    print(f'    preds_save_path >> {config.preds_save_path}')
    
    for d in [config.output_path, config.experiment_path, config.model_save_path, config.figure_save_path, config.preds_save_path]:
        os.makedirs(d, exist_ok=True)

    print("### Setup Complete. \n")
    return config

In [None]:
def main():

    # config
    config = setup(Config)
    train_path = './data/train.csv'
    test_path = './data/test.csv'
    submit_path = './data/submit_sample.csv'
    job_flags = ['Data scientist', 'Machine learning engineer','Software engineer','Consultant']

    train_ds = get_train_data(train_path)
    test_ds = get_test_data(test_path)

    score, oof_pred, fold_num = training(config, train_ds)

    sub_pred = inferring(config, test_ds)
    sub = pd.read_csv(submit_path, header=None)
    sub[1] = np.argmax(sub_pred, axis=1)
    sub[1] = sub[1].astype(int) + 1

    def fix_leak(sub, train_path, test_path):
        print("===== fix_leak =====")
        train_df = pd.read_csv(train_path)
        test_df = pd.read_csv(test_path)
        duplicated = pd.merge(test_df, train_df[["description", "jobflag"]], on="description", how="left")
        duplicated = duplicated[~duplicated["jobflag"].isnull()]
        for i in duplicated.index:
            print(f'Fix index{i}: {sub.loc[i,1]}')
            sub.loc[i, 1] = int(duplicated.loc[i, "jobflag"])
            print(f'To {sub.loc[i,1]}')
        return sub
    sub = fix_leak(sub, train_path, test_path)

    # 提出用ファイル
    sub.to_csv(os.path.join(config.preds_save_path, f'Exp{config._exp_num}_CV{int(score*(10**10))}_submission.csv'), index=False, header=False)

    return oof_pred, fold_num# debug

In [None]:
oof_pred, fold_num = main()

### Configration Setup...
    experiment_path  >> ./outputs/Exp002_microsoft/deberta-base_fold8_epoch20_tokenlen128_lr2e-05
    model_save_path >> ./outputs/Exp002_microsoft/deberta-base_fold8_epoch20_tokenlen128_lr2e-05/model
    figure_save_path >> ./outputs/Exp002_microsoft/deberta-base_fold8_epoch20_tokenlen128_lr2e-05/figure
    preds_save_path >> ./outputs/Exp002_microsoft/deberta-base_fold8_epoch20_tokenlen128_lr2e-05/preds
### Setup Complete. 



  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]


# ==== start fold1 ==== #



Downloading tokenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

Downloading config.json:   0%|          | 0.00/474 [00:00<?, ?B/s]

Downloading vocab.json:   0%|          | 0.00/878k [00:00<?, ?B/s]

Downloading merges.txt:   0%|          | 0.00/446k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/533M [00:00<?, ?B/s]

Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaModel: ['lm_predictions.lm_head.LayerNorm.bias', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.bias']
- This IS expected if you are initializing DebertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).




  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.3054728603908146
    Each F1: [0.20811287477954146, 0.1553030303030303, 0.34559999999999996, 0.5128755364806867]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.0118890210201865, 'val_f1_score': 0.5553734481224007}

    Each F1: [0.6068965517241379, 0.380952380952381, 0.6728971962616822, 0.5607476635514018]

 [ save model weight ] 



  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.5740841032717294
    Each F1: [0.5737265415549598, 0.28112449799196787, 0.7230169050715214, 0.7184684684684686]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 0.8850021839141846, 'val_f1_score': 0.6205165130568356}

    Each F1: [0.6451612903225806, 0.33333333333333337, 0.8035714285714286, 0.7]

 [ save model weight ] 



  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.6715332807437875
    Each F1: [0.6467391304347826, 0.41860465116279066, 0.8366013071895425, 0.7841880341880342]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 0.788004629862936, 'val_f1_score': 0.6794449838187703}

    Each F1: [0.6833333333333333, 0.5, 0.816, 0.7184466019417477]

 [ save model weight ] 



  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.7854819817778798
    Each F1: [0.7487309644670052, 0.6951871657754012, 0.8871989860583017, 0.8108108108108109]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 0.8287580427370573, 'val_f1_score': 0.6354948954788526}

    Each F1: [0.62, 0.411764705882353, 0.816, 0.6942148760330579]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.8500696927449716
    Each F1: [0.8072445019404915, 0.8089887640449437, 0.925, 0.8590455049944505]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 0.8446811500348543, 'val_f1_score': 0.6986105819973645}

    Each F1: [0.7076923076923076, 0.5, 0.8256880733944955, 0.7610619469026547]

 [ save model weight ] 



  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9056312556503929
    Each F1: [0.871859296482412, 0.8862275449101796, 0.9557522123893806, 0.9086859688195992]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 0.9518904761264199, 'val_f1_score': 0.6773902156677276}

    Each F1: [0.6262626262626263, 0.5185185185185185, 0.8429752066115702, 0.7218045112781954]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9256478688252547
    Each F1: [0.9088639200998752, 0.8902439024390243, 0.97, 0.9334836527621195]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 0.9199449062347412, 'val_f1_score': 0.718834240246921}

    Each F1: [0.7207207207207208, 0.5517241379310345, 0.8448275862068966, 0.7580645161290323]

 [ save model weight ] 



  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9478260734233701
    Each F1: [0.9333333333333333, 0.927710843373494, 0.9848866498740554, 0.9453734671125976]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.0119177021478352, 'val_f1_score': 0.7101570895750767}

    Each F1: [0.6981132075471698, 0.5384615384615385, 0.8521739130434783, 0.7518796992481204]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9765438330529385
    Each F1: [0.9639751552795032, 0.980891719745223, 0.9937106918238994, 0.9675977653631286]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.1075517679515638, 'val_f1_score': 0.7414518404514557}

    Each F1: [0.75, 0.5833333333333334, 0.8672566371681416, 0.765217391304348]

 [ save model weight ] 



  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9805760336273818
    Each F1: [0.9743589743589743, 0.9806451612903225, 0.9899244332493703, 0.9773755656108597]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.0753893864782234, 'val_f1_score': 0.760985353643293}

    Each F1: [0.7704918032786885, 0.6153846153846153, 0.8849557522123894, 0.773109243697479]

 [ save model weight ] 



  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.990838141676986
    Each F1: [0.9877149877149877, 0.9871794871794872, 0.9974874371859297, 0.9909706546275394]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.1964858004921362, 'val_f1_score': 0.7365709476085665}

    Each F1: [0.7457627118644068, 0.5714285714285714, 0.8648648648648649, 0.7642276422764228]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9958441762062171
    Each F1: [0.9926829268292683, 1.0, 0.9974811083123425, 0.9932126696832579]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.3725796925394158, 'val_f1_score': 0.7272126077561262}

    Each F1: [0.7413793103448276, 0.5833333333333334, 0.8403361344537815, 0.7438016528925621]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9976060638044584
    Each F1: [0.996328029375765, 1.0, 0.9974937343358395, 0.9966024915062287]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.3555360718777305, 'val_f1_score': 0.7234198743183389}

    Each F1: [0.7457627118644068, 0.56, 0.8275862068965518, 0.7603305785123967]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 1.0
    Each F1: [1.0, 1.0, 1.0, 1.0]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.4032272401608918, 'val_f1_score': 0.7233317212361561}

    Each F1: [0.7304347826086958, 0.56, 0.8448275862068966, 0.7580645161290323]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9988231421965063
    Each F1: [0.9975550122249389, 1.0, 1.0, 0.997737556561086]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.4496959673730951, 'val_f1_score': 0.7399407004008773}

    Each F1: [0.7610619469026549, 0.56, 0.8547008547008548, 0.7839999999999999]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9994115165722741
    Each F1: [0.9987760097919216, 1.0, 1.0, 0.9988700564971752]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.4719816471401015, 'val_f1_score': 0.72493433111913}

    Each F1: [0.7540983606557377, 0.5384615384615385, 0.8468468468468469, 0.7603305785123967]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 1.0
    Each F1: [1.0, 1.0, 1.0, 1.0]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.4845113202145226, 'val_f1_score': 0.7282858786310038}

    Each F1: [0.7377049180327869, 0.5833333333333334, 0.8421052631578947, 0.7500000000000001]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9980607627666451
    Each F1: [0.9987789987789988, 0.9934640522875817, 1.0, 1.0]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.4747384786605835, 'val_f1_score': 0.7569913097334857}

    Each F1: [0.7603305785123968, 0.64, 0.8571428571428571, 0.7704918032786885]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9980726313653885
    Each F1: [1.0, 0.9935483870967742, 0.9987421383647799, 1.0]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.4785842167703729, 'val_f1_score': 0.7610743321718931}

    Each F1: [0.7666666666666666, 0.64, 0.8571428571428571, 0.7804878048780488]

 [ save model weight ] 



  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 1.0
    Each F1: [1.0, 1.0, 1.0, 1.0]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.480444003406324, 'val_f1_score': 0.7363487141856828}

    Each F1: [0.7457627118644068, 0.5833333333333334, 0.8421052631578947, 0.7741935483870968]

# ==== start fold2 ==== #



Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaModel: ['lm_predictions.lm_head.LayerNorm.bias', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.bias']
- This IS expected if you are initializing DebertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).




  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.23252779580731814
    Each F1: [0.16666666666666669, 0.12244897959183673, 0.28270676691729324, 0.3582887700534759]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.2549078602539865, 'val_f1_score': 0.4203529095058547}

    Each F1: [0.35555555555555557, 0.2325581395348837, 0.43636363636363634, 0.6569343065693432]

 [ save model weight ] 



  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.4736794833092951
    Each F1: [0.4595744680851064, 0.24242424242424246, 0.5106382978723404, 0.6820809248554914]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.1116529590205142, 'val_f1_score': 0.5369907169992988}

    Each F1: [0.5106382978723405, 0.3157894736842105, 0.7234042553191489, 0.5981308411214953]

 [ save model weight ] 



  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.6260994003900915
    Each F1: [0.5911047345767576, 0.350597609561753, 0.7955112219451371, 0.7671840354767183]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.079231137350986, 'val_f1_score': 0.6472144728136244}

    Each F1: [0.6608695652173914, 0.4799999999999999, 0.7642276422764227, 0.6837606837606837]

 [ save model weight ] 



  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.6490957735619629
    Each F1: [0.636734693877551, 0.40837696335078527, 0.7617977528089888, 0.7894736842105263]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 0.8977933397418574, 'val_f1_score': 0.6359216646343886}

    Each F1: [0.5918367346938775, 0.411764705882353, 0.8141592920353982, 0.7259259259259259]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.7884746806368303
    Each F1: [0.7430830039525691, 0.6666666666666666, 0.9021601016518426, 0.841988950276243]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.1172002870785562, 'val_f1_score': 0.6408304518652475}

    Each F1: [0.6446280991735538, 0.45454545454545453, 0.8032786885245902, 0.6608695652173913]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.8284070010391594
    Each F1: [0.811704834605598, 0.6947368421052631, 0.9260204081632653, 0.8811659192825112]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.044104483566786, 'val_f1_score': 0.6060570325900515}

    Each F1: [0.6037735849056605, 0.37499999999999994, 0.7636363636363637, 0.6818181818181819]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.8727061398680906
    Each F1: [0.8669201520912546, 0.7608695652173912, 0.9518987341772152, 0.9111361079865017]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.1892648875713348, 'val_f1_score': 0.6364086139690204}

    Each F1: [0.6299212598425196, 0.4999999999999999, 0.8070175438596491, 0.6086956521739131]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9239867642371988
    Each F1: [0.9058971141781681, 0.8994082840236687, 0.9634300126103404, 0.9272116461366181]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.2689391594184072, 'val_f1_score': 0.6267609480383776}

    Each F1: [0.6, 0.41379310344827586, 0.8099173553719009, 0.6833333333333335]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9304728791109387
    Each F1: [0.9203980099502487, 0.8928571428571428, 0.97, 0.9386363636363637]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.3232480896146674, 'val_f1_score': 0.6622644892041858}

    Each F1: [0.6829268292682927, 0.5454545454545454, 0.7592592592592592, 0.6614173228346457]

 [ save model weight ] 



  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9664171370667359
    Each F1: [0.9627791563275433, 0.9440993788819876, 0.9824120603015075, 0.9763779527559054]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.341807267226671, 'val_f1_score': 0.6811946097951717}

    Each F1: [0.6776859504132231, 0.5454545454545454, 0.8067226890756302, 0.6949152542372882]

 [ save model weight ] 



  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9827835531697451
    Each F1: [0.9852216748768472, 0.9625, 0.992462311557789, 0.9909502262443439]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.5664326024682897, 'val_f1_score': 0.6749250174616871}

    Each F1: [0.6666666666666667, 0.5714285714285713, 0.7894736842105263, 0.6721311475409837]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9859611290478165
    Each F1: [0.9828009828009827, 0.980891719745223, 0.9937264742785445, 0.9864253393665159]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.7100309218231, 'val_f1_score': 0.7023641933310654}

    Each F1: [0.6949152542372882, 0.5714285714285713, 0.8264462809917354, 0.7166666666666668]

 [ save model weight ] 



  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9896304479306655
    Each F1: [0.9901477832512315, 0.9871794871794872, 0.9924812030075189, 0.9887133182844244]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.6998864870322379, 'val_f1_score': 0.6848104873183243}

    Each F1: [0.6776859504132231, 0.5454545454545454, 0.8264462809917354, 0.6896551724137931]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9925391994911371
    Each F1: [0.9914320685434517, 0.987012987012987, 0.9962264150943396, 0.9954853273137697]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.721176916360855, 'val_f1_score': 0.7014352164272631}

    Each F1: [0.6956521739130435, 0.5454545454545454, 0.8166666666666668, 0.7479674796747968]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9912760140308426
    Each F1: [0.993894993894994, 0.9806451612903225, 0.9962264150943396, 0.9943374858437145]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.789495418259972, 'val_f1_score': 0.6717825122915261}

    Each F1: [0.6666666666666666, 0.5454545454545454, 0.7967479674796748, 0.6782608695652174]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9982027201981399
    Each F1: [0.996328029375765, 1.0, 0.9987452948557088, 0.997737556561086]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.8538705273678429, 'val_f1_score': 0.6951239588208715}

    Each F1: [0.7049180327868851, 0.5555555555555556, 0.8103448275862069, 0.7096774193548387]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9982345497168226
    Each F1: [0.996328029375765, 1.0, 1.0, 0.9966101694915254]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.8504549421762164, 'val_f1_score': 0.6806782559133656}

    Each F1: [0.6776859504132231, 0.5454545454545454, 0.8099173553719009, 0.6896551724137931]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9981952932161584
    Each F1: [0.9975550122249389, 1.0, 0.9974937343358395, 0.9977324263038548]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.8357140810866104, 'val_f1_score': 0.69098669529704}

    Each F1: [0.6837606837606838, 0.5714285714285713, 0.8103448275862069, 0.6984126984126984]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9982017442293754
    Each F1: [0.996319018404908, 1.0, 0.9987452948557088, 0.9977426636568849]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.8381756126880646, 'val_f1_score': 0.6988437497811016}

    Each F1: [0.6890756302521008, 0.5714285714285713, 0.8135593220338982, 0.7213114754098361]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9993802842859447
    Each F1: [0.9987789987789988, 1.0, 0.9987421383647799, 1.0]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.8463860838036787, 'val_f1_score': 0.6988437497811016}

    Each F1: [0.6890756302521008, 0.5714285714285713, 0.8135593220338982, 0.7213114754098361]

# ==== start fold3 ==== #



Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaModel: ['lm_predictions.lm_head.LayerNorm.bias', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.bias']
- This IS expected if you are initializing DebertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).




  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.34407548306169655
    Each F1: [0.33810888252148996, 0.16867469879518074, 0.3770739064856712, 0.49244444444444435]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.0048178923757454, 'val_f1_score': 0.5357961929516092}

    Each F1: [0.46601941747572817, 0.28, 0.72, 0.6771653543307087]

 [ save model weight ] 



  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.554675293339255
    Each F1: [0.47887323943661975, 0.35023041474654376, 0.7493472584856397, 0.6402502606882169]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 0.8216480901366786, 'val_f1_score': 0.5836135014463046}

    Each F1: [0.33766233766233766, 0.45161290322580644, 0.7846153846153846, 0.7605633802816901]

 [ save model weight ] 



  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.597119890805279
    Each F1: [0.543884892086331, 0.3627450980392157, 0.7747989276139411, 0.7070506454816285]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 0.9912441228565417, 'val_f1_score': 0.5647608616283315}

    Each F1: [0.4578313253012048, 0.26666666666666666, 0.7878787878787878, 0.7466666666666666]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.7298772045397238
    Each F1: [0.6997389033942558, 0.5263157894736842, 0.8872180451127819, 0.8062360801781736]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 0.8115705499523564, 'val_f1_score': 0.681890746545919}

    Each F1: [0.6551724137931034, 0.4999999999999999, 0.8148148148148149, 0.7575757575757576]

 [ save model weight ] 



  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.8038631928467977
    Each F1: [0.762402088772846, 0.6914893617021277, 0.9203980099502488, 0.8411633109619686]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 0.991885316999335, 'val_f1_score': 0.652190566886012}

    Each F1: [0.6206896551724138, 0.4705882352941177, 0.8073394495412844, 0.7101449275362319]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.8679860269555117
    Each F1: [0.8607277289836889, 0.7790697674418604, 0.9345088161209069, 0.8976377952755905]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 0.9739621118495339, 'val_f1_score': 0.6658611442601443}

    Each F1: [0.6055045871559633, 0.5454545454545454, 0.7796610169491526, 0.7328244274809159]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9190715441211439
    Each F1: [0.9021065675340767, 0.880952380952381, 0.9635220125786164, 0.929705215419501]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.0826384798476572, 'val_f1_score': 0.6326562250139198}

    Each F1: [0.6194690265486725, 0.4210526315789474, 0.7747747747747747, 0.7153284671532847]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9508614609803204
    Each F1: [0.9419035846724351, 0.9259259259259258, 0.9773299748110832, 0.9582863585118377]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.205721590393468, 'val_f1_score': 0.6894547843617251}

    Each F1: [0.6511627906976744, 0.5882352941176471, 0.8166666666666668, 0.7017543859649124]

 [ save model weight ] 



  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.974871354994962
    Each F1: [0.9594095940959411, 0.9935483870967742, 0.9836477987421384, 0.9628796400449944]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.2555006551115135, 'val_f1_score': 0.6773516783762685}

    Each F1: [0.6666666666666667, 0.5, 0.8214285714285714, 0.7213114754098361]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9730477407765523
    Each F1: [0.9654320987654321, 0.9559748427672956, 0.9899497487437185, 0.9808342728297631]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.4880036059178805, 'val_f1_score': 0.6778833272439411}

    Each F1: [0.6666666666666667, 0.4705882352941177, 0.8347826086956522, 0.7394957983193275]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9840826333099855
    Each F1: [0.9779411764705882, 0.980891719745223, 0.9898989898989898, 0.987598647125141]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.5642059087753295, 'val_f1_score': 0.6807830476823498}

    Each F1: [0.6825396825396826, 0.4705882352941177, 0.8305084745762712, 0.7394957983193275]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9868688325398355
    Each F1: [0.9877450980392156, 0.9681528662420381, 0.9949748743718593, 0.9966024915062287]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.934720998688748, 'val_f1_score': 0.6398298640611724}

    Each F1: [0.672, 0.37499999999999994, 0.7850467289719626, 0.7272727272727272]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9869511140640868
    Each F1: [0.9878048780487805, 0.9743589743589742, 0.9924433249370276, 0.9931972789115646]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.6939259293832276, 'val_f1_score': 0.6712573072707344}

    Each F1: [0.6608695652173914, 0.4705882352941177, 0.8035714285714286, 0.7500000000000001]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9976462843930124
    Each F1: [0.9951100244498777, 1.0, 1.0, 0.995475113122172]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.8619738817214966, 'val_f1_score': 0.6821118891379023}

    Each F1: [0.6504065040650406, 0.5000000000000001, 0.8403361344537815, 0.7377049180327869]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9988231421965063
    Each F1: [0.9975550122249389, 1.0, 1.0, 0.997737556561086]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.8265036131206311, 'val_f1_score': 0.6766285194004762}

    Each F1: [0.6440677966101694, 0.5000000000000001, 0.8264462809917354, 0.736]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9961419998094815
    Each F1: [0.9975550122249389, 0.987012987012987, 1.0, 1.0]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.9236147366072003, 'val_f1_score': 0.6765020110608346}

    Each F1: [0.6324786324786326, 0.5000000000000001, 0.823529411764706, 0.75]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9988233504784028
    Each F1: [0.9975609756097561, 1.0, 1.0, 0.9977324263038548]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.9192116662075644, 'val_f1_score': 0.6727203910155057}

    Each F1: [0.6363636363636364, 0.4705882352941177, 0.8135593220338982, 0.7703703703703704]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9949724303142822
    Each F1: [0.9950980392156863, 0.9871794871794872, 0.9987421383647799, 0.9988700564971752]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.9581406910168497, 'val_f1_score': 0.6733858206744235}

    Each F1: [0.6386554621848739, 0.5000000000000001, 0.8205128205128205, 0.7343749999999999]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9994116239869354
    Each F1: [0.9987789987789988, 1.0, 1.0, 0.9988674971687429]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.9632033686888846, 'val_f1_score': 0.6687632415254238}

    Each F1: [0.6271186440677966, 0.5000000000000001, 0.8135593220338982, 0.7343749999999999]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.997456320948041
    Each F1: [1.0, 0.9934640522875817, 0.9974937343358395, 0.9988674971687429]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.9629584808098643, 'val_f1_score': 0.6687632415254238}

    Each F1: [0.6271186440677966, 0.5000000000000001, 0.8135593220338982, 0.7343749999999999]

# ==== start fold4 ==== #



Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaModel: ['lm_predictions.lm_head.LayerNorm.bias', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.bias']
- This IS expected if you are initializing DebertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).




  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.3426362663342375
    Each F1: [0.45454545454545453, 0.07179487179487179, 0.37481698389458273, 0.46938775510204084]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.1345010544124403, 'val_f1_score': 0.45826811157936986}

    Each F1: [0.49090909090909096, nan, 0.6666666666666667, 0.6754966887417219]

 [ save model weight ] 



  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.5246597084271134
    Each F1: [0.520051746442432, 0.19574468085106383, 0.6827309236947792, 0.7001114827201784]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 0.8316542951684249, 'val_f1_score': 0.5826234093146523}

    Each F1: [0.4705882352941177, 0.3902439024390244, 0.7724137931034484, 0.6972477064220184]

 [ save model weight ] 



  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.6679135258554563
    Each F1: [0.6395806028833552, 0.4587155963302752, 0.8203124999999999, 0.7530454042081949]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 0.6033805295040733, 'val_f1_score': 0.6817615935262994}

    Each F1: [0.7058823529411764, 0.42857142857142855, 0.8703703703703703, 0.7222222222222222]

 [ save model weight ] 



  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.7721721841151616
    Each F1: [0.7471116816431324, 0.6331658291457287, 0.8900255754475703, 0.8183856502242152]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 0.564556109905243, 'val_f1_score': 0.7185823754789271}

    Each F1: [0.7384615384615385, 0.4615384615384615, 0.8965517241379309, 0.7777777777777778]

 [ save model weight ] 



  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.8053852822649815
    Each F1: [0.7688311688311689, 0.7040816326530612, 0.9197994987468672, 0.8288288288288288]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 0.5789208085913408, 'val_f1_score': 0.7083583320833957}

    Each F1: [0.7079646017699114, 0.5, 0.864406779661017, 0.7610619469026547]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.8899700390510485
    Each F1: [0.8743718592964824, 0.8505747126436781, 0.9480354879594423, 0.8868980963045913]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 0.6365990099154021, 'val_f1_score': 0.745164091507965}

    Each F1: [0.7333333333333334, 0.4999999999999999, 0.9043478260869565, 0.8429752066115702]

 [ save model weight ] 



  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9351060830088901
    Each F1: [0.9120198265179678, 0.9308176100628931, 0.9701492537313434, 0.9274376417233561]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 0.6356639291110792, 'val_f1_score': 0.7545238852057033}

    Each F1: [0.743801652892562, 0.5384615384615385, 0.8928571428571428, 0.8429752066115702]

 [ save model weight ] 



  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9585640924870378
    Each F1: [0.9482758620689655, 0.9487179487179488, 0.9811320754716981, 0.9561304836895388]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 0.6970647284859105, 'val_f1_score': 0.7557399855571132}

    Each F1: [0.736842105263158, 0.5714285714285714, 0.8813559322033898, 0.8333333333333334]

 [ save model weight ] 



  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9716160258622971
    Each F1: [0.9656862745098039, 0.9625, 0.9911949685534591, 0.9670828603859251]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 0.809788253432826, 'val_f1_score': 0.780415694705976}

    Each F1: [0.782608695652174, 0.6153846153846153, 0.8833333333333334, 0.8403361344537815]

 [ save model weight ] 



  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9886357289324932
    Each F1: [0.984009840098401, 0.9935483870967742, 0.9949622166246851, 0.9820224719101124]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 0.8253016396572715, 'val_f1_score': 0.7605634148737597}

    Each F1: [0.7333333333333334, 0.5925925925925926, 0.905982905982906, 0.8103448275862069]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9879834340908413
    Each F1: [0.9816849816849816, 0.9935483870967742, 0.9937264742785445, 0.9829738933030647]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 0.9466681467859368, 'val_f1_score': 0.7663928290114249}

    Each F1: [0.7207207207207208, 0.6153846153846153, 0.8907563025210085, 0.8387096774193549]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9981952097748852
    Each F1: [0.9975609756097561, 1.0, 0.9974874371859297, 0.9977324263038548]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.0111068813424362, 'val_f1_score': 0.7450806620682192}

    Each F1: [0.7102803738317757, 0.5714285714285714, 0.8760330578512396, 0.8225806451612904]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9964595033746437
    Each F1: [0.9938800489596084, 1.0, 0.9987452948557088, 0.9932126696832579]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.1041169718692176, 'val_f1_score': 0.7271983926735819}

    Each F1: [0.706896551724138, 0.4999999999999999, 0.8888888888888888, 0.8130081300813008]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9970581199346771
    Each F1: [0.993894993894994, 1.0, 1.0, 0.9943374858437145]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.076864703078019, 'val_f1_score': 0.7579308702620123}

    Each F1: [0.7155963302752293, 0.5925925925925926, 0.8888888888888888, 0.8346456692913385]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 1.0
    Each F1: [1.0, 1.0, 1.0, 1.0]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.1648005033794202, 'val_f1_score': 0.7386252972116351}

    Each F1: [0.7027027027027027, 0.5384615384615385, 0.8907563025210085, 0.8225806451612904]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9968955521815706
    Each F1: [0.9975490196078431, 0.9935483870967742, 0.9987421383647799, 0.9977426636568849]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.166582761312786, 'val_f1_score': 0.7545155449567215}

    Each F1: [0.7027027027027027, 0.5833333333333334, 0.8907563025210085, 0.8412698412698413]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9994115165722741
    Each F1: [0.9987760097919216, 1.0, 1.0, 0.9988700564971752]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.1520933289276927, 'val_f1_score': 0.7473718359820054}

    Each F1: [0.7207207207207208, 0.5384615384615385, 0.8983050847457625, 0.832]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9993803261619076
    Each F1: [0.9987760097919216, 1.0, 0.9987452948557088, 1.0]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.1510480165481567, 'val_f1_score': 0.747808523713129}

    Each F1: [0.7142857142857143, 0.5384615384615385, 0.8947368421052632, 0.84375]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9993802842859447
    Each F1: [0.9987789987789988, 1.0, 0.9987421383647799, 1.0]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.145359932748895, 'val_f1_score': 0.7473718359820054}

    Each F1: [0.7207207207207208, 0.5384615384615385, 0.8983050847457625, 0.832]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 1.0
    Each F1: [1.0, 1.0, 1.0, 1.0]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.1476904956918015, 'val_f1_score': 0.7473718359820054}

    Each F1: [0.7207207207207208, 0.5384615384615385, 0.8983050847457625, 0.832]

# ==== start fold5 ==== #



Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaModel: ['lm_predictions.lm_head.LayerNorm.bias', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.bias']
- This IS expected if you are initializing DebertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).




  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.376598157025955
    Each F1: [0.3577712609970674, 0.15037593984962405, 0.46505717916137235, 0.5331882480957563]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 0.9368459784164631, 'val_f1_score': 0.5395713950520427}

    Each F1: [0.6422018348623854, nan, 0.7874015748031495, 0.7286821705426357]

 [ save model weight ] 



  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.6064735137516541
    Each F1: [0.6178428761651131, 0.29310344827586204, 0.7696335078534032, 0.745314222712238]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 0.7672559502263548, 'val_f1_score': 0.6698547294834243}

    Each F1: [0.6464646464646465, 0.4827586206896552, 0.7938931297709922, 0.7563025210084034]

 [ save model weight ] 



  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.7270404442136591
    Each F1: [0.7044854881266491, 0.5816326530612246, 0.8389513108614232, 0.7830923248053392]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 0.7195225638687295, 'val_f1_score': 0.6743111839012104}

    Each F1: [0.6597938144329897, 0.4285714285714286, 0.8181818181818182, 0.7906976744186046]

 [ save model weight ] 



  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.7950005986076565
    Each F1: [0.7735368956743003, 0.6770833333333334, 0.8956743002544529, 0.8337078651685393]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 0.7275795545527544, 'val_f1_score': 0.7007575757575758}

    Each F1: [0.6666666666666666, 0.5000000000000001, 0.8181818181818182, 0.8181818181818182]

 [ save model weight ] 



  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.8589211518180739
    Each F1: [0.8505747126436781, 0.782122905027933, 0.9252577319587629, 0.8777292576419214]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 0.717752129627914, 'val_f1_score': 0.6899467504972092}

    Each F1: [0.6909090909090909, 0.46153846153846156, 0.8073394495412844, 0.8]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9038378574233966
    Each F1: [0.8802992518703243, 0.8795180722891566, 0.9523809523809523, 0.9031531531531533]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 0.8440485265519884, 'val_f1_score': 0.744065294371548}

    Each F1: [0.7704918032786885, 0.5833333333333334, 0.8256880733944955, 0.7967479674796747]

 [ save model weight ] 



  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9446328920616818
    Each F1: [0.9255583126550869, 0.9325153374233128, 0.9811320754716981, 0.9393258426966292]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 0.8322546608233578, 'val_f1_score': 0.6998562484027601}

    Each F1: [0.7047619047619048, 0.5000000000000001, 0.7884615384615385, 0.8062015503875969]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9692158801978188
    Each F1: [0.9621489621489621, 0.9685534591194969, 0.9822784810126581, 0.963882618510158]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 0.9049902085273985, 'val_f1_score': 0.7060827742425271}

    Each F1: [0.6990291262135923, 0.4999999999999999, 0.8099173553719009, 0.8153846153846153]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.983084703763927
    Each F1: [0.9717791411042944, 0.9871794871794872, 0.9937421777221527, 0.9796380090497737]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 0.8986256576719738, 'val_f1_score': 0.6871204245574498}

    Each F1: [0.68, 0.4848484848484849, 0.7868852459016393, 0.7967479674796747]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9879810234331707
    Each F1: [0.9779411764705881, 0.9935483870967742, 0.9962358845671268, 0.9841986455981941]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.1143263474343315, 'val_f1_score': 0.6932443131348242}

    Each F1: [0.7037037037037037, 0.5, 0.7809523809523811, 0.7883211678832117]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9889103280065678
    Each F1: [0.9877450980392157, 0.9746835443037974, 1.0, 0.9932126696832579]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.0673478460185741, 'val_f1_score': 0.7033754786318196}

    Each F1: [0.7394957983193277, 0.4666666666666667, 0.8073394495412844, 0.8]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9945888348008151
    Each F1: [0.9926650366748165, 1.0, 0.992462311557789, 0.9932279909706545]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.160920142181336, 'val_f1_score': 0.7131495175577349}

    Each F1: [0.75, 0.4827586206896552, 0.8073394495412844, 0.8125]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.997586736241632
    Each F1: [0.9951219512195122, 1.0, 0.9974874371859297, 0.997737556561086]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.1991183940695707, 'val_f1_score': 0.7180255244915761}

    Each F1: [0.7358490566037735, 0.5185185185185185, 0.8148148148148149, 0.8029197080291971]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9993810734086769
    Each F1: [0.9987789987789988, 1.0, 0.9987452948557088, 1.0]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.1863620126688923, 'val_f1_score': 0.7106805466180467}

    Each F1: [0.7321428571428572, 0.5185185185185185, 0.8108108108108109, 0.7812500000000001]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9944961746971639
    Each F1: [0.9926829268292683, 0.9934640522875817, 0.9974874371859297, 0.9943502824858758]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.2893730067702198, 'val_f1_score': 0.7069013345329134}

    Each F1: [0.7543859649122806, 0.4615384615384615, 0.7962962962962964, 0.8153846153846153]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 1.0
    Each F1: [1.0, 1.0, 1.0, 1.0]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.18301749260968, 'val_f1_score': 0.7221687809572808}

    Each F1: [0.7543859649122806, 0.4827586206896552, 0.8318584070796461, 0.819672131147541]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 1.0
    Each F1: [1.0, 1.0, 1.0, 1.0]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.23699593827838, 'val_f1_score': 0.7345240923222576}

    Each F1: [0.7706422018348623, 0.5384615384615385, 0.8108108108108109, 0.8181818181818182]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9994122638190435
    Each F1: [0.9987789987789988, 1.0, 1.0, 0.9988700564971752]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.240201734676563, 'val_f1_score': 0.7347277722277721}

    Each F1: [0.7636363636363636, 0.5384615384615385, 0.8214285714285714, 0.8153846153846153]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9987621031989214
    Each F1: [0.9975609756097561, 1.0, 0.9974874371859297, 1.0]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.2493675896730372, 'val_f1_score': 0.730517254563056}

    Each F1: [0.7636363636363636, 0.5384615384615385, 0.8108108108108109, 0.8091603053435115]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9980818464689433
    Each F1: [0.9987789987789988, 0.9935483870967742, 1.0, 1.0]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.2503408641411513, 'val_f1_score': 0.730517254563056}

    Each F1: [0.7636363636363636, 0.5384615384615385, 0.8108108108108109, 0.8091603053435115]

# ==== start fold6 ==== #



Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaModel: ['lm_predictions.lm_head.LayerNorm.bias', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.bias']
- This IS expected if you are initializing DebertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).




  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.38670749745353583
    Each F1: [0.35028248587570626, 0.17120622568093383, 0.5156794425087108, 0.5096618357487923]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 0.8972463251421692, 'val_f1_score': 0.6440359477124183}

    Each F1: [0.5555555555555556, 0.4705882352941177, 0.7999999999999999, 0.7500000000000001]

 [ save model weight ] 



  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.5301089138647419
    Each F1: [0.49045521292217326, 0.2814070351758794, 0.6717226435536295, 0.6768507638072855]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.1871176232736578, 'val_f1_score': 0.40519531739066805}

    Each F1: [0.3037974683544304, 0.10526315789473685, 0.5842696629213482, 0.6274509803921569]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.6860687298246575
    Each F1: [0.6386333771353483, 0.52, 0.8198757763975156, 0.7657657657657658]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 0.7588425650798455, 'val_f1_score': 0.6830430575713595}

    Each F1: [0.6603773584905661, 0.4, 0.8717948717948718, 0.8]

 [ save model weight ] 



  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.7485696510251048
    Each F1: [0.7171314741035858, 0.608294930875576, 0.866161616161616, 0.8026905829596412]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 0.8196595503539635, 'val_f1_score': 0.6875462268865566}

    Each F1: [0.62, 0.4999999999999999, 0.8620689655172413, 0.7681159420289856]

 [ save model weight ] 



  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.8257499705248539
    Each F1: [0.7896103896103897, 0.7459459459459459, 0.9246487867177523, 0.8427947598253275]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 0.8771998402933595, 'val_f1_score': 0.6986165697048049}

    Each F1: [0.673076923076923, 0.4666666666666667, 0.8479999999999999, 0.8067226890756302]

 [ save model weight ] 



  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.8816492391311337
    Each F1: [0.8531289910600256, 0.8474576271186441, 0.9398496240601504, 0.8861607142857144]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 0.9004066220666996, 'val_f1_score': 0.6792182707988171}

    Each F1: [0.7079646017699116, 0.33333333333333337, 0.8688524590163934, 0.8067226890756302]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9221500128161313
    Each F1: [0.9015151515151515, 0.9047619047619049, 0.9649122807017544, 0.9174107142857143]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.0117212597024503, 'val_f1_score': 0.6769157221167179}

    Each F1: [0.6942148760330579, 0.36363636363636365, 0.8739495798319329, 0.7758620689655172]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9385105605932299
    Each F1: [0.9249692496924969, 0.9024390243902438, 0.9811794228356336, 0.9454545454545454]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.1517653298125696, 'val_f1_score': 0.6931160908384586}

    Each F1: [0.6774193548387097, 0.45454545454545453, 0.8947368421052632, 0.7457627118644068]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9587120066842775
    Each F1: [0.9501246882793017, 0.9325153374233128, 0.983810709838107, 0.9683972911963882]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.1223508438105305, 'val_f1_score': 0.7158375668749719}

    Each F1: [0.7068965517241379, 0.4615384615384615, 0.8813559322033898, 0.8135593220338982]

 [ save model weight ] 



  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.982288243555121
    Each F1: [0.9767441860465117, 0.9746835443037974, 0.9924812030075189, 0.985244040862656]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.3990301114541512, 'val_f1_score': 0.6978797128949756}

    Each F1: [0.7096774193548386, 0.43478260869565216, 0.8571428571428571, 0.7899159663865546]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9798442428820713
    Each F1: [0.9790897908979089, 0.9625, 0.9924433249370276, 0.9853438556933484]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.4422851737844882, 'val_f1_score': 0.6996019408328622}

    Each F1: [0.7166666666666668, 0.39999999999999997, 0.8620689655172413, 0.819672131147541]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9880059155914905
    Each F1: [0.9817295980511571, 0.9935483870967742, 0.9936948297604035, 0.9830508474576272]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.451786584955044, 'val_f1_score': 0.6871277156331086}

    Each F1: [0.6949152542372882, 0.380952380952381, 0.8760330578512396, 0.7966101694915254]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9854480889867812
    Each F1: [0.9817295980511571, 0.980891719745223, 0.9962358845671268, 0.9829351535836177]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.5402631425352955, 'val_f1_score': 0.6918390647050671}

    Each F1: [0.717948717948718, 0.380952380952381, 0.8495575221238938, 0.8188976377952756]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9944104548714174
    Each F1: [0.9951100244498777, 0.9871794871794872, 0.9987421383647799, 0.9966101694915254]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.631870426828899, 'val_f1_score': 0.67339967737468}

    Each F1: [0.6885245901639345, 0.36363636363636365, 0.8448275862068966, 0.7966101694915254]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.994330856119549
    Each F1: [0.9963369963369964, 0.987012987012987, 0.9962358845671268, 0.997737556561086]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.6174220609917211, 'val_f1_score': 0.6922577505578502}

    Each F1: [0.7, 0.380952380952381, 0.8813559322033898, 0.8067226890756302]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9994123675930383
    Each F1: [0.9987819732034106, 1.0, 1.0, 0.9988674971687429]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.665748326235978, 'val_f1_score': 0.6797455556558406}

    Each F1: [0.6890756302521007, 0.380952380952381, 0.8620689655172413, 0.7868852459016393]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.997484895184432
    Each F1: [0.9987789987789988, 0.9935483870967742, 0.9987421383647799, 0.9988700564971752]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.7213001194454374, 'val_f1_score': 0.6881868131868132}

    Each F1: [0.7, 0.380952380952381, 0.8717948717948718, 0.8]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9968432051549251
    Each F1: [0.9963369963369964, 0.9935483870967742, 0.9974874371859297, 1.0]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.726852032557997, 'val_f1_score': 0.6816607235655733}

    Each F1: [0.6942148760330579, 0.36363636363636365, 0.8620689655172413, 0.8067226890756302]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9994123675930383
    Each F1: [0.9987819732034106, 1.0, 1.0, 0.9988674971687429]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.7273177015718328, 'val_f1_score': 0.677299676607977}

    Each F1: [0.7, 0.36363636363636365, 0.8521739130434783, 0.7933884297520662]


  0%|          | 0/42 [00:00<?, ?it/s]

Training score: 0.9988246330427105
    Each F1: [0.9975609756097561, 1.0, 1.0, 0.997737556561086]


  0%|          | 0/6 [00:00<?, ?it/s]

{'val_loss': 1.7253467796971558, 'val_f1_score': 0.6881868131868132}

    Each F1: [0.7, 0.380952380952381, 0.8717948717948718, 0.8]

# ==== start fold7 ==== #



# Debug

In [None]:
train_path = './data/train.csv'
train_ds = get_train_data(train_path)

  0%|          | 0/1 [00:00<?, ?ba/s]

In [None]:
from torch.nn.functional import cross_entropy
def calculate_loss(logits, labels):

    pred_labels = np.argmax(logits, axis=-1)
    loss = cross_entropy(logits, labels, reduction="none")
    return pred_labels, loss

pred_labels, loss = calculate_loss(torch.tensor(oof_pred), torch.tensor(train_ds["jobflag"]))

In [None]:
df = pd.DataFrame(train_ds)[["id", "clean_description", "jobflag"]]
df["pred"] = pred_labels
df["loss"] = loss
df["fold"] = fold_num
df = df.sort_values("loss").reset_index(drop=True)
df

Unnamed: 0,id,clean_description,jobflag,pred,loss,fold
0,1432,Collaborate stakeholders throughout organizati...,0,0,0.743669,8
1,50,Support stakeholder analysis efforts change ma...,3,3,0.743670,1
2,106,Design develop authentication authorization so...,2,2,0.743670,1
3,3,"Work technical design, development, release de...",2,2,0.743670,1
4,1443,Research develop new methodologies demand fore...,0,0,0.743670,8
...,...,...,...,...,...,...
1511,85,Develop front-end interfaces facilitate usage ...,3,2,1.743659,1
1512,17,"Use AWS AI services (e.g., Personalize), ML pl...",0,1,1.743659,1
1513,1508,Define problems opportunities complex business...,1,0,1.743660,8
1514,325,Delivering individual Brand sales quotas. Deve...,0,3,1.743670,2


In [None]:
df.head(750)["jobflag"].value_counts()

2    306
3    219
0    198
1     27
Name: jobflag, dtype: int64

In [None]:
df.tail(750)["jobflag"].value_counts()

3    283
0    265
2    142
1     60
Name: jobflag, dtype: int64

In [None]:
df

Unnamed: 0,id,clean_description,jobflag,pred,loss,fold
0,1432,Collaborate stakeholders throughout organizati...,0,0,0.743669,8
1,50,Support stakeholder analysis efforts change ma...,3,3,0.743670,1
2,106,Design develop authentication authorization so...,2,2,0.743670,1
3,3,"Work technical design, development, release de...",2,2,0.743670,1
4,1443,Research develop new methodologies demand fore...,0,0,0.743670,8
...,...,...,...,...,...,...
1511,85,Develop front-end interfaces facilitate usage ...,3,2,1.743659,1
1512,17,"Use AWS AI services (e.g., Personalize), ML pl...",0,1,1.743659,1
1513,1508,Define problems opportunities complex business...,1,0,1.743660,8
1514,325,Delivering individual Brand sales quotas. Deve...,0,3,1.743670,2


In [None]:
job_flags = ['DS', 'ML', 'SE', 'CO']

In [None]:
from transformers import pipeline
tokenizer = AutoTokenizer.from_pretrained('facebook/mbart-large-50-one-to-many-mmt')
mbart_translator = pipeline('translation',
                            model='facebook/mbart-large-50-one-to-many-mmt',
                            src_lang='en_XX', tgt_lang='ja_XX', device=0)

Downloading tokenizer_config.json:   0%|          | 0.00/528 [00:00<?, ?B/s]

Downloading config.json:   0%|          | 0.00/1.39k [00:00<?, ?B/s]

Downloading sentencepiece.bpe.model:   0%|          | 0.00/4.83M [00:00<?, ?B/s]

Downloading special_tokens_map.json:   0%|          | 0.00/717 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

In [None]:
# DS
print("\n=========== Head 20 ===========\n")
temp_df = df[df["jobflag"]==0].head(20).reset_index(drop=True)
for i in range(len(temp_df)):
    ex = temp_df.loc[i, :]
    print(f'pred={ex["pred"]}({job_flags[ex["pred"]]}) >> loss={ex["loss"]}')
    print(mbart_translator(ex["clean_description"])[0]["translation_text"])
    print(ex["clean_description"], "\n")
    # bt_desc = augs["bt_en_de"].augment(ex["clean_description"])[0]
    # print(mbart_translator(bt_desc)[0]["translation_text"])
    # print(bt_desc, "\n")

print("\n=========== Tail 30 ===========\n")
temp_df = df[df["jobflag"]==0].tail(30).reset_index(drop=True)
for i in range(len(temp_df)):
    ex = temp_df.loc[i, :]
    print(f'pred={ex["pred"]}({job_flags[ex["pred"]]}) >> loss={ex["loss"]}')
    print(mbart_translator(ex["clean_description"])[0]["translation_text"])
    print(ex["clean_description"], "\n")
    # bt_desc = augs["bt_en_de"].augment(ex["clean_description"])[0]
    # print(mbart_translator(bt_desc)[0]["translation_text"])
    # print(bt_desc, "\n")

In [None]:
# ML
temp_df = df[df["jobflag"]==1].reset_index(drop=True)
for i in range(len(temp_df)):
    ex = temp_df.loc[i, :]
    print(f'pred={ex["pred"]}({job_flags[ex["pred"]]}) >> loss={ex["loss"]}')
    print(mbart_translator(ex["clean_description"])[0]["translation_text"])
    print(ex["clean_description"], "\n")
    # bt_desc = augs["bt_en_de"].augment(ex["clean_description"])[0]
    # print(mbart_translator(bt_desc)[0]["translation_text"])
    # print(bt_desc, "\n")

In [None]:
# SE
print("\n=========== Head 20 ===========\n")
temp_df = df[df["jobflag"]==2].head(20).reset_index(drop=True)
for i in range(len(temp_df)):
    ex = temp_df.loc[i, :]
    print(f'pred={ex["pred"]}({job_flags[ex["pred"]]}) >> loss={ex["loss"]}')
    print(mbart_translator(ex["clean_description"])[0]["translation_text"])
    print(ex["clean_description"], "\n")
    # bt_desc = augs["bt_en_de"].augment(ex["clean_description"])[0]
    # print(mbart_translator(bt_desc)[0]["translation_text"])
    # print(bt_desc, "\n")

print("\n=========== Tail 30 ===========\n")
temp_df = df[df["jobflag"]==2].tail(30).reset_index(drop=True)
for i in range(len(temp_df)):
    ex = temp_df.loc[i, :]
    print(f'pred={ex["pred"]}({job_flags[ex["pred"]]}) >> loss={ex["loss"]}')
    print(mbart_translator(ex["clean_description"])[0]["translation_text"])
    print(ex["clean_description"], "\n")
    # bt_desc = augs["bt_en_de"].augment(ex["clean_description"])[0]
    # print(mbart_translator(bt_desc)[0]["translation_text"])
    # print(bt_desc, "\n")

In [None]:
# CO
print("\n=========== Head 20 ===========\n")
temp_df = df[df["jobflag"]==3].head(20).reset_index(drop=True)
for i in range(len(temp_df)):
    ex = temp_df.loc[i, :]
    print(f'pred={ex["pred"]}({job_flags[ex["pred"]]}) >> loss={ex["loss"]}')
    print(mbart_translator(ex["clean_description"])[0]["translation_text"])
    print(ex["clean_description"], "\n")
    # bt_desc = augs["bt_en_de"].augment(ex["clean_description"])[0]
    # print(mbart_translator(bt_desc)[0]["translation_text"])
    # print(bt_desc, "\n")

print("\n=========== Tail 30 ===========\n")
temp_df = df[df["jobflag"]==3].tail(30).reset_index(drop=True)
for i in range(len(temp_df)):
    ex = temp_df.loc[i, :]
    print(f'pred={ex["pred"]}({job_flags[ex["pred"]]}) >> loss={ex["loss"]}')
    print(mbart_translator(ex["clean_description"])[0]["translation_text"])
    print(ex["clean_description"], "\n")
    # bt_desc = augs["bt_en_de"].augment(ex["clean_description"])[0]
    # print(mbart_translator(bt_desc)[0]["translation_text"])
    # print(bt_desc, "\n")