# 概要
- SpanBERT
- Dropout
- AdamW
- CosineAnnealinglr
- window stride

## setup envirionment

In [None]:
import os

if os.environ.get("KAGGLE_KERNEL_RUN_TYPE") is None:
    ON_KAGGLE = False
else:
    ON_KAGGLE = True
if not ON_KAGGLE:
    import shutil
    from requests import get

    from google.colab import drive, files
    # mount Google Drive
    drive.mount("/content/drive")
    %cd drive/MyDrive/Kaggle_Feedback-Prize-Evaluating-Student-Writing/main/
    !pip install  -qq sentencepiece transformers torch==1.9.1 torchvision==0.10.1 torchAudio==0.9.1 torchtext==0.10.1
    for dirname, _, filenames in os.walk('/kaggle/input'):
        for filename in filenames:
            print(os.path.join(dirname, filename))

Mounted at /content/drive
/content/drive/MyDrive/Kaggle_Feedback-Prize-Evaluating-Student-Writing/main
[K     |████████████████████████████████| 1.2 MB 13.9 MB/s 
[K     |████████████████████████████████| 3.5 MB 87.1 MB/s 
[K     |████████████████████████████████| 831.4 MB 6.0 kB/s 
[K     |████████████████████████████████| 22.1 MB 77.5 MB/s 
[K     |████████████████████████████████| 1.9 MB 92.4 MB/s 
[K     |████████████████████████████████| 7.6 MB 89.0 MB/s 
[K     |████████████████████████████████| 6.8 MB 86.9 MB/s 
[K     |████████████████████████████████| 596 kB 93.1 MB/s 
[K     |████████████████████████████████| 67 kB 6.6 MB/s 
[K     |████████████████████████████████| 895 kB 88.8 MB/s 
[?25h

Config

In [None]:
class Config:
    name = 'nb014_v3'
    model_savename = 'longformer'
    
    if ON_KAGGLE:
        model_name = '../input/pt-longformer-base' # https://www.kaggle.com/kishalmandal/pt-longformer-base
        # base_dir = '/content/drive/MyDrive/petfinder'
        data_dir = '../input/feedback-prize-2021/'
        pre_data_dir = './preprocessed/'
        model_dir = '.'
        output_dir = '.'
    else:
        # customize for my own Google Colab Environment
        # model_name = 'SpanBERT/spanbert-base-cased'
        model_name = 'allenai/longformer-base-4096' # download from Internet
        base_dir = '/content/drive/MyDrive/Kaggle_Feedback-Prize-Evaluating-Student-Writing/'
        data_dir = os.path.join(base_dir, 'input/feedback-prize-2021/')
        pre_data_dir = os.path.join(base_dir, 'data/preprocessed')
        model_dir = os.path.join(base_dir, f'model/{name}')
        output_dir = os.path.join(base_dir, f'output/{name}')

    is_debug = False
    load_texts = True
    n_epoch = 15 # not to exceed runtime limits on Kaggle
    n_fold = 5
    verbose_steps = 500
    random_seed = 71
    max_length = 1024
    train_batch_size = 8
    valid_batch_size = 8
    lr = 4e-6
    num_labels = 15
    label_subtokens = True
    output_hidden_states = True
    hidden_dropout_prob = 0.1
    layer_norm_eps = 1e-7
    add_pooling_layer = False
    max_grad_norm = 10
    verbose_steps = 500
    es_patience=1
    if is_debug:
        debug_sample = 1000
        verbose_steps = 16
        n_epoch = 1
        n_fold = 2

constants

In [None]:
IGNORE_INDEX = -100
NON_LABEL = -1
OUTPUT_LABELS = ['0', 'B-Lead', 'I-Lead', 'B-Position', 'I-Position', 'B-Claim', 'I-Claim', 'B-Counterclaim', 'I-Counterclaim', 
                 'B-Rebuttal', 'I-Rebuttal', 'B-Evidence', 'I-Evidence', 'B-Concluding Statement', 'I-Concluding Statement']
LABELS_TO_IDS = {v:k for k,v in enumerate(OUTPUT_LABELS)}
IDS_TO_LABELS = {k:v for k,v in enumerate(OUTPUT_LABELS)}

MIN_THRESH = {
    "I-Lead": 9,
    "I-Position": 5,
    "I-Evidence": 14,
    "I-Claim": 3,
    "I-Concluding Statement": 11,
    "I-Counterclaim": 6,
    "I-Rebuttal": 4,
}

PROB_THRESH = {
    "I-Lead": 0.7,
    "I-Position": 0.55,
    "I-Evidence": 0.65,
    "I-Claim": 0.55,
    "I-Concluding Statement": 0.7,
    "I-Counterclaim": 0.5,
    "I-Rebuttal": 0.55,
}

In [None]:
if not ON_KAGGLE:
    if not os.path.exists(Config.model_dir):
        os.makedirs(Config.model_dir, exist_ok=True)
    if not os.path.exists(Config.output_dir):
        os.makedirs(Config.output_dir, exist_ok=True)

### libraries

In [None]:
# if not ON_KAGGLE:
#     !pip install -qq transformers

In [None]:
# general
import pandas as pd
import numpy as np
import time
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
sns.set()
import random
from tqdm.notebook import tqdm
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score
import gc
from collections import defaultdict
# nlp
from sklearn.feature_extraction.text import CountVectorizer
import torch
import torch.nn as nn
from torch import optim
from transformers import (
    LongformerConfig, 
    LongformerModel, 
    LongformerTokenizerFast,
    AutoConfig,
    AutoModel, 
    AutoTokenizer,
    logging)
logging.set_verbosity_warning()
logging.set_verbosity_error()
from torch.utils.data import Dataset, DataLoader
from torch.cuda.amp import autocast, GradScaler

import warnings
warnings.filterwarnings('ignore')

## preprocess
use corrected train.csv

https://www.kaggle.com/nbroad/corrected-train-csv-feedback-prize/notebook

In [None]:
if ON_KAGGLE:
    df_alltrain = pd.read_csv('../input/corrected-train-csv-feedback-prize/corrected_train.csv')
else:
    df_alltrain = pd.read_csv(f'{Config.data_dir}/corrected_train.csv')

In [None]:
def agg_essays(train_flg):
    folder = 'train' if train_flg else 'test'
    names, texts =[], []
    for f in tqdm(list(os.listdir(f'{Config.data_dir}/{folder}'))):
        names.append(f.replace('.txt', ''))
        texts.append(open(f'{Config.data_dir}/{folder}/' + f, 'r').read())
        df_texts = pd.DataFrame({'id': names, 'text': texts})

    df_texts['text_split'] = df_texts.text.str.split()
    print('Completed tokenizing texts.')
    return df_texts

In [None]:
def ner(df_texts, df_train):
    all_entities = []
    for _,  row in tqdm(df_texts.iterrows(), total=len(df_texts)):
        total = len(row['text_split'])
        entities = ['0'] * total

        for _, row2 in df_train[df_train['id'] == row['id']].iterrows():
            discourse = row2['discourse_type']
            list_ix = [int(x) for x in row2['predictionstring'].split(' ')]
            entities[list_ix[0]] = f'B-{discourse}'
            for k in list_ix[1:]: entities[k] = f'I-{discourse}'
        all_entities.append(entities)

    df_texts['entities'] = all_entities
    print('Completed mapping discourse to each token.')
    return df_texts

In [None]:
if not Config.load_texts:    
    def preprocess(df_train = None):
        if df_train is None:
            train_flg = False
        else:
            train_flg = True
        
        df_texts = agg_essays(train_flg)

        if train_flg:
            df_texts = ner(df_texts, df_train)
        return df_texts
    
    alltrain_texts = preprocess(df_alltrain)
    test_texts = preprocess()
    # alltrain_texts.to_pickle('../input/fb-data/alltrain_texts_correct.pkl')
    # test_texts.to_pickle('../input/fb-data/test_texts_correct.pkl')
else:
    alltrain_texts = pd.read_pickle('../input/fb-data/alltrain_texts_correct.pkl')
    test_texts = pd.read_pickle('../input/fb-data/test_texts_correct.pkl')

In [None]:
if Config.is_debug:
    alltrain_texts = alltrain_texts.sample(Config.debug_sample).reset_index(drop=True)
print(len(alltrain_texts))

15594


set seed & split train/test

In [None]:
def seed_everything(seed=Config.random_seed):
    #os.environ['PYTHONSEED'] = str(seed)
    np.random.seed(seed%(2**32-1))
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic=True
    torch.backends.cudnn.benchmark = False

seed_everything()
# device optimization
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

print(f'Using device: {device}')

Using device: cuda


In [None]:
def split_fold(df_train):
    ids = df_train['id'].unique()
    kf = KFold(n_splits=Config.n_fold, shuffle = True, random_state=Config.random_seed)
    for i_fold, (_, valid_index) in enumerate(kf.split(ids)):
        df_train.loc[valid_index,'fold'] = i_fold
    return df_train

alltrain_texts = split_fold(alltrain_texts)
alltrain_texts.head()

Unnamed: 0,id,text,text_split,entities,fold
0,F48EF80D2ED3,There are many programs in the world around yo...,"[There, are, many, programs, in, the, world, a...","[B-Lead, I-Lead, I-Lead, I-Lead, I-Lead, I-Lea...",0.0
1,F8FB4470A52F,"Dear Senator,\n\n""The Electoral College is a p...","[Dear, Senator,, ""The, Electoral, College, is,...","[0, 0, B-Lead, I-Lead, I-Lead, I-Lead, I-Lead,...",0.0
2,F176A8CF72BB,In my opinion i don't think that is fair. i th...,"[In, my, opinion, i, don't, think, that, is, f...","[B-Position, I-Position, I-Position, I-Positio...",4.0
3,EBDE7FC748A4,Unmasking the Face\n\nThe face on Mars was rea...,"[Unmasking, the, Face, The, face, on, Mars, wa...","[0, 0, 0, B-Position, I-Position, I-Position, ...",3.0
4,F6C40C564E5E,Luke think you should join the seagoing cowboy...,"[Luke, think, you, should, join, the, seagoing...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, B-Claim, I-Clai...",4.0


## dataset

In [None]:
class FeedbackPrizeDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_len, has_labels):
        self.len = len(dataframe)
        self.data = dataframe
        self.tokenizer = tokenizer
        self.max_len = max_len
        self.has_labels = has_labels
    
    def __getitem__(self, index):
        text = self.data['text'][index]
        encoding = self.tokenizer(
            text.split(),
            is_split_into_words = True,
            padding = 'max_length',
            truncation = True,
            max_length = self.max_len
        )
        word_ids = encoding.word_ids()

        # targets
        if self.has_labels:
            word_labels = self.data.entities[index]
            prev_word_idx = None
            labels_ids = []
            for word_idx in word_ids:
                if word_idx is None:
                    labels_ids.append(IGNORE_INDEX)
                elif word_idx != prev_word_idx:
                    labels_ids.append(LABELS_TO_IDS[word_labels[word_idx]])
                else:
                    if Config.label_subtokens:
                        labels_ids.append(LABELS_TO_IDS[word_labels[word_idx]])
                    else:
                        labels_ids.append(IGNORE_INDEX)
                prev_word_idx = word_idx
            encoding['labels'] = labels_ids
        # convert to torch.tensor
        item = {k: torch.as_tensor(v) for k, v in encoding.items()}
        word_ids2 = [w if w is not None else NON_LABEL for w in word_ids]
        item['word_ids'] = torch.as_tensor(word_ids2)
        return item

    def __len__(self):
        return self.len

## model

In [None]:
class FeedbackModel(nn.Module):
    def __init__(self):
        super(FeedbackModel, self).__init__()
        model_config = LongformerConfig.from_pretrained(Config.model_name)
        self.model_config = model_config
        self.model = LongformerModel.from_pretrained(Config.model_name, config=model_config)
        self.head = nn.Linear(model_config.hidden_size, Config.num_labels)
        self.dropout = nn.Dropout(0.1)
    
    def forward(self, input_ids, mask):
        x = self.model(input_ids, mask)
        x = x[0]
        x = self.dropout(x)
        logits = self.head(x)
        return logits

## utility function

In [None]:
def active_logits(raw_logits, word_ids):
    word_ids = word_ids.view(-1)
    active_mask = word_ids.unsqueeze(1).expand(word_ids.shape[0], Config.num_labels)
    active_mask = active_mask != NON_LABEL
    active_logits = raw_logits.view(-1, Config.num_labels)
    active_logits = torch.masked_select(active_logits, active_mask) # return 1dTensor
    active_logits = active_logits.view(-1, Config.num_labels) 
    return active_logits

def active_labels(labels):
    active_mask = labels.view(-1) != IGNORE_INDEX
    active_labels = torch.masked_select(labels.view(-1), active_mask)
    return active_labels

def active_preds_prob(active_logits):
    active_preds = torch.argmax(active_logits, axis = 1)
    active_preds_prob, _ = torch.max(active_logits, axis = 1)
    return active_preds, active_preds_prob

## evaluating function

In [None]:
def calc_overlap(row):
    """
    calculate the overlap between prediction and ground truth
    """
    set_pred = set(row.new_predictionstring_pred.split(' '))
    set_gt = set(row.new_predictionstring_gt.split(' '))
    # length of each end intersection
    len_pred = len(set_pred)
    len_gt = len(set_gt)
    intersection = len(set_gt.intersection(set_pred))
    overlap_1 = intersection / len_gt
    overlap_2 = intersection / len_pred
    return [overlap_1, overlap_2]

def score_feedback_comp(pred_df, gt_df):
    """
    A function that scores for the kaggle
        Student Writing Competition
        
    Uses the steps in the evaluation page here:
        https://www.kaggle.com/c/feedback-prize-2021/overview/evaluation
    """
    gt_df = gt_df[['id', 'discourse_type', 'new_predictionstring']].reset_index(drop = True).copy()
    pred_df = pred_df[['id', 'class', 'new_predictionstring']].reset_index(drop = True).copy()
    gt_df['gt_id'] = gt_df.index
    pred_df['pred_id'] = pred_df.index
    joined = pred_df.merge(
        gt_df,
        left_on = ['id', 'class'],
        right_on = ['id', 'discourse_type'],
        how = 'outer',
        suffixes = ['_pred', '_gt']
    )
    joined['new_predictionstring_gt'] =  joined['new_predictionstring_gt'].fillna(' ')
    joined['new_predictionstring_pred'] =  joined['new_predictionstring_pred'].fillna(' ')
    joined['overlaps'] = joined.apply(calc_overlap, axis = 1)
    # overlap over 0.5: true positive
    # If nultiple overlaps exists, the higher is taken.
    joined['overlap1'] = joined['overlaps'].apply(lambda x: eval(str(x))[0])
    joined['overlap2'] = joined['overlaps'].apply(lambda x: eval(str(x))[1])

    joined['potential_TP'] = (joined['overlap1'] >= 0.5) & (joined['overlap2'] >= 0.5)
    joined['max_overlap'] = joined[['overlap1', 'overlap2']].max(axis = 1)
    tp_pred_ids = joined.query('potential_TP').sort_values('max_overlap', ascending = False)\
                  .groupby(['id', 'new_predictionstring_gt']).first()['pred_id'].values
    
    fp_pred_ids = [p for p in joined['pred_id'].unique() if p not in tp_pred_ids]
    matched_gt_ids = joined.query('potential_TP')['gt_id'].unique()
    unmatched_gt_ids = [c for c in joined['gt_id'].unique() if c not in matched_gt_ids]

    TP = len(tp_pred_ids)
    FP = len(fp_pred_ids)
    FN = len(unmatched_gt_ids)
    macro_f1_score = TP / (TP + 1/2 * (FP + FN))
    return macro_f1_score

def oof_score(df_val, oof):
    f1score = []
    classes = ['Lead', 'Position','Claim', 'Counterclaim', 'Rebuttal','Evidence','Concluding Statement']
    for c in classes:
        pred_df = oof.loc[oof['class'] == c].copy()
        gt_df = df_val.loc[df_val['discourse_type'] == c].copy()
        f1 = score_feedback_comp(pred_df, gt_df)
        print(f'{c:<10}: {f1:4f}')
        f1score.append(f1)
    f1avg = np.mean(f1score)
    return f1avg

## inferencing function

In [None]:
def inference(model, dl, criterion, valid_flg):
    final_predictions = []
    final_predictions_prob = []
    stream = tqdm(dl)
    model.eval()
    
    valid_loss = 0
    valid_accuracy = 0
    all_logits = None
    for batch_idx, batch in enumerate(stream, start = 1):
        ids = batch['input_ids'].to(device, dtype = torch.long)
        mask = batch['attention_mask'].to(device, dtype = torch.long)
        with torch.no_grad():
            raw_logits = model(input_ids=ids, mask = mask)
        del ids, mask
        
        word_ids = batch['word_ids'].to(device, dtype = torch.long)
        if valid_flg:    
            raw_labels = batch['labels'].to(device, dtype = torch.long)
            logits = active_logits(raw_logits, word_ids)
            labels = active_labels(raw_labels)
            preds, preds_prob = active_preds_prob(logits)
            valid_accuracy += accuracy_score(labels.cpu().numpy(), preds.cpu().numpy())
            loss = criterion(logits, labels)
            valid_loss += loss.item()
        
        if batch_idx == 1:
            all_logits = raw_logits.cpu().numpy()
        else:
            all_logits = np.append(all_logits, raw_logits.cpu().numpy(), axis=0)

    
    if valid_flg:        
        epoch_loss = valid_loss / batch_idx
        epoch_accuracy = valid_accuracy / batch_idx
    else:
        epoch_loss, epoch_accuracy = 0, 0
    return all_logits, epoch_loss, epoch_accuracy


def preds_class_prob(all_logits, dl):
    print("predict target class and its probabilty")
    final_predictions = []
    final_predictions_score = []
    stream = tqdm(dl)
    len_sample = all_logits.shape[0]

    for batch_idx, batch in enumerate(stream, start=0):
        for minibatch_idx in range(Config.valid_batch_size):
            sample_idx = int(batch_idx * Config.valid_batch_size + minibatch_idx)
            if sample_idx > len_sample - 1 : break
            word_ids = batch['word_ids'][minibatch_idx].numpy()
            predictions =[]
            predictions_prob = []
            pred_class_id = np.argmax(all_logits[sample_idx], axis=1)
            pred_score = np.max(all_logits[sample_idx], axis=1)
            pred_class_labels = [IDS_TO_LABELS[i] for i in pred_class_id]
            prev_word_idx = -1
            for idx, word_idx in enumerate(word_ids):
                if word_idx == -1:
                    pass
                elif word_idx != prev_word_idx:
                    predictions.append(pred_class_labels[idx])
                    predictions_prob.append(pred_score[idx])
                    prev_word_idx = word_idx
            final_predictions.append(predictions)
            final_predictions_score.append(predictions_prob)
    return final_predictions, final_predictions_score

In [None]:
def get_preds_onefold(model, df, dl, criterion, valid_flg):
    logits, valid_loss, valid_acc = inference(model, dl, criterion, valid_flg)
    all_preds, all_preds_prob = preds_class_prob(logits, dl)
    df_pred = post_process_pred(df, all_preds, all_preds_prob)
    return df_pred, valid_loss, valid_acc

def get_preds_folds(df, dl, criterion, valid_flg=False):
    for i_fold in range(Config.n_fold):
        model_filename = os.path.join(Config.model_dir, f"{Config.model_savename}_{i_fold}.bin")
        print(f"{model_filename} inference")
        model = FeedbackModel()
        model = model.to(device)
        model.load_state_dict(torch.load(model_filename))
        logits, valid_loss, valid_acc = inference(model, dl, criterion, valid_flg)
        if i_fold == 0:
            avg_pred_logits = logits
        else:
            avg_pred_logits += logits
    avg_pred_logits /= Config.n_fold
    all_preds, all_preds_prob = preds_class_prob(avg_pred_logits, dl)
    df_pred = post_process_pred(df, all_preds, all_preds_prob)
    return df_pred

def post_process_pred(df, all_preds, all_preds_prob):
    final_preds = []
    for i in range(len(df)):
        idx = df.id.values[i]
        pred = all_preds[i]
        pred_prob = all_preds_prob[i]
        j = 0
        while j < len(pred):
            cls = pred[j]
            if cls == '0': j += 1
            else: cls = cls.replace('B', 'I')
            end = j + 1
            while end < len(pred) and pred[end] == cls:
                end += 1
            if cls != '0' and cls !='':
                avg_score = np.mean(pred_prob[j:end])
                if end - j > MIN_THRESH[cls] and avg_score > PROB_THRESH[cls]:
                    final_preds.append((idx, cls.replace('I-', ''), ' '.join(map(str, list(range(j, end))))))
            j = end
    df_pred = pd.DataFrame(final_preds)
    df_pred.columns = ['id', 'class', 'new_predictionstring']
    return df_pred

## training and validating function

In [None]:
def train_fn(model, dl_train, optimizer, epoch, criterion, scheduler):
    model.train()
    train_loss = 0
    train_accuracy = 0
    stream = tqdm(dl_train)
    scaler = GradScaler()

    for batch_idx, batch in enumerate(stream, start = 1):
        ids = batch['input_ids'].to(device, dtype = torch.long)
        mask = batch['attention_mask'].to(device, dtype = torch.long)
        raw_labels = batch['labels'].to(device, dtype = torch.long)
        word_ids = batch['word_ids'].to(device, dtype = torch.long)
        optimizer.zero_grad()
        with autocast():
            raw_logits = model(input_ids = ids, mask = mask)
        
        logits = active_logits(raw_logits, word_ids)
        labels = active_labels(raw_labels)
        preds, preds_prob = active_preds_prob(logits)
        train_accuracy += accuracy_score(labels.cpu().numpy(), preds.cpu().numpy())
        criterion = nn.CrossEntropyLoss()
        loss = criterion(logits, labels)

        scaler.scale(loss).backward()
        # loss.backward()
        scaler.step(optimizer)
        scaler.update()
        # torch.nn.utils.clip_grad_norm_(
            # parameters=model.parameters(), max_norm=Config.max_grad_norm
        # )
        # optimizer.step()
        scheduler.step()
        train_loss += loss.item()

        if batch_idx % Config.verbose_steps == 0:
            loss_step = train_loss / batch_idx
            print(f'Epoch {epoch}/{Config.n_epoch} | {batch_idx:04d} steps: {loss_step}')

    epoch_loss = train_loss / batch_idx
    epoch_accuracy = train_accuracy / batch_idx
    del dl_train, raw_logits, logits, raw_labels, preds, labels
    torch.cuda.empty_cache()
    gc.collect()
    print(f'epoch {epoch} - training loss: {epoch_loss:.4f}')
    print(f'epoch {epoch} - training accuracy: {epoch_accuracy:.4f}')
    return epoch_loss

In [None]:
def valid_fn(model, df_val, df_val_eval, dl_val, epoch, criterion):
    oof, valid_loss, valid_acc  = get_preds_onefold(model, df_val, dl_val, criterion, valid_flg=True)
    f1score =[]
    # classes = oof['class'].unique()
    classes = ['Lead', 'Position', 'Claim','Counterclaim', 'Rebuttal','Evidence','Concluding Statement']
    print(f"Validation F1 scores")

    for c in classes:
        pred_df = oof.loc[oof['class'] == c].copy()
        gt_df = df_val_eval.loc[df_val_eval['discourse_type'] == c].copy()
        f1 = score_feedback_comp(pred_df, gt_df)
        print(f' * {c:<10}: {f1:4f}')
        f1score.append(f1)
    f1avg = np.mean(f1score)
    print(f'Overall Validation avg F1: {f1avg:.4f} val_loss:{valid_loss:.4f} val_accuracy:{valid_acc:.4f}')
    return valid_loss, oof, f1avg

# Early Stopping

In [None]:
class EarlyStopping:
    """earlystoppingクラス"""

    def __init__(self, patience=5, verbose=False, direction='max'):
        """引数：最小値の非更新数カウンタ、表示設定、モデル格納path"""

        self.patience = patience    #設定ストップカウンタ
        self.verbose = verbose      #表示の有無
        self.counter = 0            #現在のカウンタ値
        self.best_score = None      #ベストスコア
        self.early_stop = False     #ストップフラグ
        self.direction = direction
        if self.direction == 'max':
            self.val_loss_init = -np.Inf
        elif self.direction == 'min':
            self.val_loss_init = np.Inf

    def __call__(self, val_loss, model):
        """
        特殊(call)メソッド
        実際に学習ループ内で最小lossを更新したか否かを計算させる部分
        """
        if self.direction == 'max':
            self.score = val_loss
        elif self.direction == 'min':
            self.score = -val_loss

        if self.best_score is None:  #1Epoch目の処理
            self.best_score = self.score   #1Epoch目はそのままベストスコアとして記録する
            self.checkpoint(val_loss, model)  #記録後にモデルを保存してスコア表示する
        elif self.score < self.best_score:  # ベストスコアを更新できなかった場合
            self.counter += 1   #ストップカウンタを+1
            if self.verbose:  #表示を有効にした場合は経過を表示
                print(f'EarlyStopping counter: {self.counter} out of {self.patience}')  #現在のカウンタを表示する 
            if self.counter >= self.patience:  #設定カウントを上回ったらストップフラグをTrueに変更
                self.early_stop = True
        else:  #ベストスコアを更新した場合
            self.best_score = self.score  #ベストスコアを上書き
            self.checkpoint(val_loss, model)  #モデルを保存してスコア表示
            self.counter = 0  #ストップカウンタリセット

    def checkpoint(self, val_loss, model):
        '''ベストスコア更新時に実行されるチェックポイント関数'''
        if self.verbose:  #表示を有効にした場合は、前回のベストスコアからどれだけ更新したか？を表示
            if self.direction == 'max':
                print(f'Validation loss increased ({self.val_loss_init:.6f} --> {val_loss:.6f}).  Saving model ...')
            else:
                print(f'Validation loss decreased ({self.val_loss_init:.6f} --> {val_loss:.6f}).  Saving model ...')
        self.val_loss_init = val_loss  #その時のlossを記録する

# Training loop



In [None]:
start_time = time.time()

oof = pd.DataFrame()
for i_fold in range(Config.n_fold):
    print('='*50, f'Fold{i_fold} training', '='*50)
    tokenizer = LongformerTokenizerFast.from_pretrained(Config.model_name, add_prefix_space = True)
    model = FeedbackModel()
    model = model.to(device)
    optimizer = torch.optim.AdamW(params=model.parameters(), lr=Config.lr)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=20)
    
    df_train = alltrain_texts[alltrain_texts['fold'] != i_fold].reset_index(drop = True)
    ds_train = FeedbackPrizeDataset(df_train, tokenizer, Config.max_length, True)
    df_val = alltrain_texts[alltrain_texts['fold'] == i_fold].reset_index(drop = True)
    val_idlist = df_val['id'].unique().tolist()
    df_val_eval = df_alltrain.query('id==@val_idlist').reset_index(drop=True)
    ds_val = FeedbackPrizeDataset(df_val, tokenizer, Config.max_length, True)
    dl_train = DataLoader(ds_train, batch_size=Config.train_batch_size, shuffle=True, num_workers=2, pin_memory=True)
    dl_val = DataLoader(ds_val, batch_size=Config.valid_batch_size, shuffle=False, num_workers=2, pin_memory=True)

    best_val_loss = np.inf
    criterion = nn.CrossEntropyLoss()

    train_loss_history = []
    valid_loss_history = []
    valid_f1_history = []

    # Early Stopping
    model_filename = f'{Config.model_dir}/{Config.model_savename}_{i_fold}.bin'
    earlystopping = EarlyStopping(patience=Config.es_patience, verbose=True, direction='max') 

    for epoch in range(1, Config.n_epoch + 1):
        print('-'*30, f'Epoch{epoch}', '-'*30)
        # Training
        train_loss = train_fn(model, dl_train, optimizer, epoch, criterion, scheduler) # train
        train_loss_history.append(train_loss) # train lossの保存

        # Validation
        valid_loss, _oof, val_f1avg = valid_fn(model, df_val, df_val_eval, dl_val, epoch, criterion) # validation
        valid_loss_history.append(valid_loss) # valid lossの保存
        valid_f1_history.append(val_f1avg) # valid f1の保存

        if valid_loss < best_val_loss:
            best_val_loss = valid_loss
            _oof_fold_best = _oof
            _oof_fold_best['fold'] = i_fold
            # validation lossを更新したらモデルを保存する
            torch.save(model.state_dict(), model_filename)
            print(f'{model_filename} saved')

        # early stoppingに引っ掛かったらmodelを保存する
        earlystopping(val_f1avg, model) # callメソッド呼び出し
        if earlystopping.early_stop: #ストップフラグがTrueの場合、breakでforループを抜ける
            print("Early Stopping!")
            break
    print(f'Fold{i_fold} best f1 score: {np.max(valid_f1_history)}')

    # lossの描画
    fig, ax = plt.subplots(1, 1, figsize=(10,6))
    sns.lineplot(data=train_loss_history, label='train loss')
    sns.lineplot(data=valid_loss_history, label='valid loss')
    ax.set_title(f'loss history: fold{i_fold}')
    plt.legend();

    oof = pd.concat([oof, _oof_fold_best])
    del df_train, ds_train, df_val, val_idlist, df_val_eval, ds_val, dl_train, dl_val, tokenizer, model, optimizer
    gc.collect()
    torch.cuda.empty_cache()

oof.to_csv(f'{Config.output_dir}/oof_{Config.name}.csv', index=False)
print('elapsed time:', f'{time.time() - start_time:.1f}s')



Downloading:   0%|          | 0.00/878k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/446k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.29M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/694 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/570M [00:00<?, ?B/s]

------------------------------ Epoch1 ------------------------------


  0%|          | 0/1560 [00:00<?, ?it/s]

Epoch 1/15 | 0500 steps: 1.4483173828125
Epoch 1/15 | 1000 steps: 1.21328515625
Epoch 1/15 | 1500 steps: 1.1054401041666666
epoch 1 - training loss: 1.0951
epoch 1 - training accuracy: 0.6697


  0%|          | 0/390 [00:00<?, ?it/s]

predict target class and its probabilty


  0%|          | 0/390 [00:00<?, ?it/s]

Validation F1 scores
 * Lead      : 0.666667
 * Position  : 0.496162
 * Claim     : 0.353610
 * Counterclaim: 0.212824
 * Rebuttal  : 0.035398
 * Evidence  : 0.582135
 * Concluding Statement: 0.740767
Overall Validation avg F1: 0.4411 val_loss:0.8271 val_accuracy:0.7347
/content/drive/MyDrive/Kaggle_Feedback-Prize-Evaluating-Student-Writing/model/nb014_v3/longformer_0.bin saved
Validation loss increased (-inf --> 0.441080).  Saving model ...
------------------------------ Epoch2 ------------------------------


  0%|          | 0/1560 [00:00<?, ?it/s]

Epoch 2/15 | 0500 steps: 0.81584326171875
Epoch 2/15 | 1000 steps: 0.808825927734375
Epoch 2/15 | 1500 steps: 0.79665966796875
epoch 2 - training loss: 0.7948
epoch 2 - training accuracy: 0.7425


  0%|          | 0/390 [00:00<?, ?it/s]

predict target class and its probabilty


  0%|          | 0/390 [00:00<?, ?it/s]

Validation F1 scores
 * Lead      : 0.712644
 * Position  : 0.551735
 * Claim     : 0.418916
 * Counterclaim: 0.344981
 * Rebuttal  : 0.211139
 * Evidence  : 0.628137
 * Concluding Statement: 0.754657
Overall Validation avg F1: 0.5175 val_loss:0.7444 val_accuracy:0.7552
/content/drive/MyDrive/Kaggle_Feedback-Prize-Evaluating-Student-Writing/model/nb014_v3/longformer_0.bin saved
Validation loss increased (0.441080 --> 0.517458).  Saving model ...
------------------------------ Epoch3 ------------------------------


  0%|          | 0/1560 [00:00<?, ?it/s]

Epoch 3/15 | 0500 steps: 0.74566015625
Epoch 3/15 | 1000 steps: 0.737950439453125
Epoch 3/15 | 1500 steps: 0.7310174153645833
epoch 3 - training loss: 0.7303
epoch 3 - training accuracy: 0.7590


  0%|          | 0/390 [00:00<?, ?it/s]

predict target class and its probabilty


  0%|          | 0/390 [00:00<?, ?it/s]

Validation F1 scores
 * Lead      : 0.714356
 * Position  : 0.581950
 * Claim     : 0.483920
 * Counterclaim: 0.401966
 * Rebuttal  : 0.278014
 * Evidence  : 0.642842
 * Concluding Statement: 0.745191
Overall Validation avg F1: 0.5497 val_loss:0.7127 val_accuracy:0.7626
/content/drive/MyDrive/Kaggle_Feedback-Prize-Evaluating-Student-Writing/model/nb014_v3/longformer_0.bin saved
Validation loss increased (0.517458 --> 0.549748).  Saving model ...
------------------------------ Epoch4 ------------------------------


  0%|          | 0/1560 [00:00<?, ?it/s]

Epoch 4/15 | 0500 steps: 0.70177587890625
Epoch 4/15 | 1000 steps: 0.6931484375
Epoch 4/15 | 1500 steps: 0.6890888671875
epoch 4 - training loss: 0.6888
epoch 4 - training accuracy: 0.7708


  0%|          | 0/390 [00:00<?, ?it/s]

predict target class and its probabilty


  0%|          | 0/390 [00:00<?, ?it/s]

Validation F1 scores
 * Lead      : 0.732884
 * Position  : 0.598195
 * Claim     : 0.506736
 * Counterclaim: 0.436720
 * Rebuttal  : 0.320972
 * Evidence  : 0.644979
 * Concluding Statement: 0.732537
Overall Validation avg F1: 0.5676 val_loss:0.6978 val_accuracy:0.7650
/content/drive/MyDrive/Kaggle_Feedback-Prize-Evaluating-Student-Writing/model/nb014_v3/longformer_0.bin saved
Validation loss increased (0.549748 --> 0.567575).  Saving model ...
------------------------------ Epoch5 ------------------------------


  0%|          | 0/1560 [00:00<?, ?it/s]

Epoch 5/15 | 0500 steps: 0.6676474609375
Epoch 5/15 | 1000 steps: 0.662588623046875
Epoch 5/15 | 1500 steps: 0.6613688151041667
epoch 5 - training loss: 0.6632
epoch 5 - training accuracy: 0.7770


  0%|          | 0/390 [00:00<?, ?it/s]

predict target class and its probabilty


  0%|          | 0/390 [00:00<?, ?it/s]

Validation F1 scores
 * Lead      : 0.743186
 * Position  : 0.603959
 * Claim     : 0.508867
 * Counterclaim: 0.446340
 * Rebuttal  : 0.325882
 * Evidence  : 0.638616
 * Concluding Statement: 0.750213
Overall Validation avg F1: 0.5739 val_loss:0.6708 val_accuracy:0.7765
/content/drive/MyDrive/Kaggle_Feedback-Prize-Evaluating-Student-Writing/model/nb014_v3/longformer_0.bin saved
Validation loss increased (0.567575 --> 0.573866).  Saving model ...
------------------------------ Epoch6 ------------------------------


  0%|          | 0/1560 [00:00<?, ?it/s]

Epoch 6/15 | 0500 steps: 0.648205078125
Epoch 6/15 | 1000 steps: 0.64167529296875
Epoch 6/15 | 1500 steps: 0.6393701171875
epoch 6 - training loss: 0.6395
epoch 6 - training accuracy: 0.7835


  0%|          | 0/390 [00:00<?, ?it/s]

predict target class and its probabilty


  0%|          | 0/390 [00:00<?, ?it/s]

Validation F1 scores
 * Lead      : 0.747181
 * Position  : 0.612972
 * Claim     : 0.524943
 * Counterclaim: 0.439982
 * Rebuttal  : 0.348891
 * Evidence  : 0.653788
 * Concluding Statement: 0.739852
Overall Validation avg F1: 0.5811 val_loss:0.6675 val_accuracy:0.7726
/content/drive/MyDrive/Kaggle_Feedback-Prize-Evaluating-Student-Writing/model/nb014_v3/longformer_0.bin saved
Validation loss increased (0.573866 --> 0.581087).  Saving model ...
------------------------------ Epoch7 ------------------------------


  0%|          | 0/1560 [00:00<?, ?it/s]

Epoch 7/15 | 0500 steps: 0.6241015625
Epoch 7/15 | 1000 steps: 0.620136474609375
Epoch 7/15 | 1500 steps: 0.6216316731770833
epoch 7 - training loss: 0.6206
epoch 7 - training accuracy: 0.7892


  0%|          | 0/390 [00:00<?, ?it/s]

predict target class and its probabilty


  0%|          | 0/390 [00:00<?, ?it/s]

Validation F1 scores
 * Lead      : 0.743541
 * Position  : 0.609820
 * Claim     : 0.530814
 * Counterclaim: 0.456364
 * Rebuttal  : 0.319489
 * Evidence  : 0.654604
 * Concluding Statement: 0.770560
Overall Validation avg F1: 0.5836 val_loss:0.6589 val_accuracy:0.7777
/content/drive/MyDrive/Kaggle_Feedback-Prize-Evaluating-Student-Writing/model/nb014_v3/longformer_0.bin saved
Validation loss increased (0.581087 --> 0.583599).  Saving model ...
------------------------------ Epoch8 ------------------------------


  0%|          | 0/1560 [00:00<?, ?it/s]

Epoch 8/15 | 0500 steps: 0.60252587890625
Epoch 8/15 | 1000 steps: 0.60429052734375
Epoch 8/15 | 1500 steps: 0.6016220703125
epoch 8 - training loss: 0.6030
epoch 8 - training accuracy: 0.7945


  0%|          | 0/390 [00:00<?, ?it/s]

predict target class and its probabilty


  0%|          | 0/390 [00:00<?, ?it/s]

Validation F1 scores
 * Lead      : 0.721311
 * Position  : 0.597116
 * Claim     : 0.517816
 * Counterclaim: 0.446488
 * Rebuttal  : 0.327887
 * Evidence  : 0.653480
 * Concluding Statement: 0.756496
Overall Validation avg F1: 0.5744 val_loss:0.6708 val_accuracy:0.7751
EarlyStopping counter: 1 out of 1
Early Stopping!
Fold0 best f1 score: 0.5835986406042247
------------------------------ Epoch1 ------------------------------


  0%|          | 0/1560 [00:00<?, ?it/s]

Epoch 1/15 | 0500 steps: 1.4189130859375
Epoch 1/15 | 1000 steps: 1.2010244140625
Epoch 1/15 | 1500 steps: 1.096251953125
epoch 1 - training loss: 1.0885
epoch 1 - training accuracy: 0.6736


  0%|          | 0/390 [00:00<?, ?it/s]

predict target class and its probabilty


  0%|          | 0/390 [00:00<?, ?it/s]

Validation F1 scores
 * Lead      : 0.701445
 * Position  : 0.520729
 * Claim     : 0.343300
 * Counterclaim: 0.264009
 * Rebuttal  : 0.006466
 * Evidence  : 0.603434
 * Concluding Statement: 0.736626
Overall Validation avg F1: 0.4537 val_loss:0.8227 val_accuracy:0.7322
/content/drive/MyDrive/Kaggle_Feedback-Prize-Evaluating-Student-Writing/model/nb014_v3/longformer_1.bin saved
Validation loss increased (-inf --> 0.453715).  Saving model ...
------------------------------ Epoch2 ------------------------------


  0%|          | 0/1560 [00:00<?, ?it/s]

Epoch 2/15 | 0500 steps: 0.8153583984375
Epoch 2/15 | 1000 steps: 0.804488037109375
Epoch 2/15 | 1500 steps: 0.7901998697916667
epoch 2 - training loss: 0.7885
epoch 2 - training accuracy: 0.7456


  0%|          | 0/390 [00:00<?, ?it/s]

predict target class and its probabilty


  0%|          | 0/390 [00:00<?, ?it/s]

Validation F1 scores
 * Lead      : 0.712932
 * Position  : 0.569892
 * Claim     : 0.366527
 * Counterclaim: 0.368196
 * Rebuttal  : 0.216049
 * Evidence  : 0.622076
 * Concluding Statement: 0.740020
Overall Validation avg F1: 0.5137 val_loss:0.7568 val_accuracy:0.7536
/content/drive/MyDrive/Kaggle_Feedback-Prize-Evaluating-Student-Writing/model/nb014_v3/longformer_1.bin saved
Validation loss increased (0.453715 --> 0.513671).  Saving model ...
------------------------------ Epoch3 ------------------------------


  0%|          | 0/1560 [00:00<?, ?it/s]

Epoch 3/15 | 0500 steps: 0.73897119140625
Epoch 3/15 | 1000 steps: 0.732017578125
Epoch 3/15 | 1500 steps: 0.7233662109375
epoch 3 - training loss: 0.7229
epoch 3 - training accuracy: 0.7619


  0%|          | 0/390 [00:00<?, ?it/s]

predict target class and its probabilty


  0%|          | 0/390 [00:00<?, ?it/s]

Validation F1 scores
 * Lead      : 0.732606
 * Position  : 0.591231
 * Claim     : 0.453822
 * Counterclaim: 0.412827
 * Rebuttal  : 0.275387
 * Evidence  : 0.623558
 * Concluding Statement: 0.747066
Overall Validation avg F1: 0.5481 val_loss:0.7088 val_accuracy:0.7607
/content/drive/MyDrive/Kaggle_Feedback-Prize-Evaluating-Student-Writing/model/nb014_v3/longformer_1.bin saved
Validation loss increased (0.513671 --> 0.548071).  Saving model ...
------------------------------ Epoch4 ------------------------------


  0%|          | 0/1560 [00:00<?, ?it/s]

Epoch 4/15 | 0500 steps: 0.6945673828125
Epoch 4/15 | 1000 steps: 0.689139404296875
Epoch 4/15 | 1500 steps: 0.68654296875
epoch 4 - training loss: 0.6856
epoch 4 - training accuracy: 0.7720


  0%|          | 0/390 [00:00<?, ?it/s]

predict target class and its probabilty


  0%|          | 0/390 [00:00<?, ?it/s]

Validation F1 scores
 * Lead      : 0.706697
 * Position  : 0.593760
 * Claim     : 0.480946
 * Counterclaim: 0.429739
 * Rebuttal  : 0.305497
 * Evidence  : 0.628398
 * Concluding Statement: 0.737034
Overall Validation avg F1: 0.5546 val_loss:0.7156 val_accuracy:0.7582
Validation loss increased (0.548071 --> 0.554582).  Saving model ...
------------------------------ Epoch5 ------------------------------


  0%|          | 0/1560 [00:00<?, ?it/s]

Epoch 5/15 | 0500 steps: 0.66119677734375
Epoch 5/15 | 1000 steps: 0.663470947265625
Epoch 5/15 | 1500 steps: 0.6618395182291666
epoch 5 - training loss: 0.6606
epoch 5 - training accuracy: 0.7780


  0%|          | 0/390 [00:00<?, ?it/s]

predict target class and its probabilty


  0%|          | 0/390 [00:00<?, ?it/s]

Validation F1 scores
 * Lead      : 0.761613
 * Position  : 0.622333
 * Claim     : 0.493250
 * Counterclaim: 0.433110
 * Rebuttal  : 0.308285
 * Evidence  : 0.623495
 * Concluding Statement: 0.756626
Overall Validation avg F1: 0.5712 val_loss:0.6788 val_accuracy:0.7746
/content/drive/MyDrive/Kaggle_Feedback-Prize-Evaluating-Student-Writing/model/nb014_v3/longformer_1.bin saved
Validation loss increased (0.554582 --> 0.571244).  Saving model ...
------------------------------ Epoch6 ------------------------------


  0%|          | 0/1560 [00:00<?, ?it/s]

Epoch 6/15 | 0500 steps: 0.64806787109375
Epoch 6/15 | 1000 steps: 0.6437001953125
Epoch 6/15 | 1500 steps: 0.639134765625
epoch 6 - training loss: 0.6394
epoch 6 - training accuracy: 0.7846


  0%|          | 0/390 [00:00<?, ?it/s]

predict target class and its probabilty


  0%|          | 0/390 [00:00<?, ?it/s]

Validation F1 scores
 * Lead      : 0.737827
 * Position  : 0.615822
 * Claim     : 0.497285
 * Counterclaim: 0.434985
 * Rebuttal  : 0.323013
 * Evidence  : 0.626040
 * Concluding Statement: 0.756812
Overall Validation avg F1: 0.5703 val_loss:0.6761 val_accuracy:0.7716
/content/drive/MyDrive/Kaggle_Feedback-Prize-Evaluating-Student-Writing/model/nb014_v3/longformer_1.bin saved
EarlyStopping counter: 1 out of 1
Early Stopping!
Fold1 best f1 score: 0.5712443836386821
------------------------------ Epoch1 ------------------------------


  0%|          | 0/1560 [00:00<?, ?it/s]

Epoch 1/15 | 0500 steps: 1.45523828125
Epoch 1/15 | 1000 steps: 1.2241474609375
Epoch 1/15 | 1500 steps: 1.1184117838541667
epoch 1 - training loss: 1.1088
epoch 1 - training accuracy: 0.6646


  0%|          | 0/390 [00:00<?, ?it/s]

predict target class and its probabilty


  0%|          | 0/390 [00:00<?, ?it/s]

Validation F1 scores
 * Lead      : 0.675741
 * Position  : 0.519305
 * Claim     : 0.350714
 * Counterclaim: 0.229899
 * Rebuttal  : 0.007042
 * Evidence  : 0.589215
 * Concluding Statement: 0.743620
Overall Validation avg F1: 0.4451 val_loss:0.8175 val_accuracy:0.7367
/content/drive/MyDrive/Kaggle_Feedback-Prize-Evaluating-Student-Writing/model/nb014_v3/longformer_2.bin saved
Validation loss increased (-inf --> 0.445077).  Saving model ...
------------------------------ Epoch2 ------------------------------


  0%|          | 0/1560 [00:00<?, ?it/s]

Epoch 2/15 | 0500 steps: 0.82455859375
Epoch 2/15 | 1000 steps: 0.80964599609375
Epoch 2/15 | 1500 steps: 0.80408642578125
epoch 2 - training loss: 0.8029
epoch 2 - training accuracy: 0.7397


  0%|          | 0/390 [00:00<?, ?it/s]

predict target class and its probabilty


  0%|          | 0/390 [00:00<?, ?it/s]

Validation F1 scores
 * Lead      : 0.692254
 * Position  : 0.571333
 * Claim     : 0.427287
 * Counterclaim: 0.300913
 * Rebuttal  : 0.096610
 * Evidence  : 0.605929
 * Concluding Statement: 0.749380
Overall Validation avg F1: 0.4920 val_loss:0.7586 val_accuracy:0.7497
/content/drive/MyDrive/Kaggle_Feedback-Prize-Evaluating-Student-Writing/model/nb014_v3/longformer_2.bin saved
Validation loss increased (0.445077 --> 0.491958).  Saving model ...
------------------------------ Epoch3 ------------------------------


  0%|          | 0/1560 [00:00<?, ?it/s]

Epoch 3/15 | 0500 steps: 0.74501123046875
Epoch 3/15 | 1000 steps: 0.73852490234375
Epoch 3/15 | 1500 steps: 0.7377364908854167
epoch 3 - training loss: 0.7364
epoch 3 - training accuracy: 0.7572


  0%|          | 0/390 [00:00<?, ?it/s]

predict target class and its probabilty


  0%|          | 0/390 [00:00<?, ?it/s]

Validation F1 scores
 * Lead      : 0.722351
 * Position  : 0.607890
 * Claim     : 0.477360
 * Counterclaim: 0.372910
 * Rebuttal  : 0.220253
 * Evidence  : 0.622360
 * Concluding Statement: 0.752639
Overall Validation avg F1: 0.5394 val_loss:0.7032 val_accuracy:0.7664
/content/drive/MyDrive/Kaggle_Feedback-Prize-Evaluating-Student-Writing/model/nb014_v3/longformer_2.bin saved
Validation loss increased (0.491958 --> 0.539395).  Saving model ...
------------------------------ Epoch4 ------------------------------


  0%|          | 0/1560 [00:00<?, ?it/s]

Epoch 4/15 | 0500 steps: 0.70283056640625
Epoch 4/15 | 1000 steps: 0.7006572265625
Epoch 4/15 | 1500 steps: 0.6977864583333333
epoch 4 - training loss: 0.6974
epoch 4 - training accuracy: 0.7680


  0%|          | 0/390 [00:00<?, ?it/s]

predict target class and its probabilty


  0%|          | 0/390 [00:00<?, ?it/s]

Validation F1 scores
 * Lead      : 0.730732
 * Position  : 0.621250
 * Claim     : 0.490586
 * Counterclaim: 0.383367
 * Rebuttal  : 0.245783
 * Evidence  : 0.628578
 * Concluding Statement: 0.757123
Overall Validation avg F1: 0.5511 val_loss:0.6830 val_accuracy:0.7726
/content/drive/MyDrive/Kaggle_Feedback-Prize-Evaluating-Student-Writing/model/nb014_v3/longformer_2.bin saved
Validation loss increased (0.539395 --> 0.551060).  Saving model ...
------------------------------ Epoch5 ------------------------------


  0%|          | 0/1560 [00:00<?, ?it/s]

Epoch 5/15 | 0500 steps: 0.67375830078125
Epoch 5/15 | 1000 steps: 0.66828466796875
Epoch 5/15 | 1500 steps: 0.6677545572916667
epoch 5 - training loss: 0.6677
epoch 5 - training accuracy: 0.7755


  0%|          | 0/390 [00:00<?, ?it/s]

predict target class and its probabilty


  0%|          | 0/390 [00:00<?, ?it/s]

Validation F1 scores
 * Lead      : 0.738447
 * Position  : 0.620975
 * Claim     : 0.486498
 * Counterclaim: 0.395506
 * Rebuttal  : 0.293314
 * Evidence  : 0.631568
 * Concluding Statement: 0.752170
Overall Validation avg F1: 0.5598 val_loss:0.6754 val_accuracy:0.7762
/content/drive/MyDrive/Kaggle_Feedback-Prize-Evaluating-Student-Writing/model/nb014_v3/longformer_2.bin saved
Validation loss increased (0.551060 --> 0.559783).  Saving model ...
------------------------------ Epoch6 ------------------------------


  0%|          | 0/1560 [00:00<?, ?it/s]

Epoch 6/15 | 0500 steps: 0.6451494140625
Epoch 6/15 | 1000 steps: 0.648826171875
Epoch 6/15 | 1500 steps: 0.6468671875
epoch 6 - training loss: 0.6470
epoch 6 - training accuracy: 0.7811


  0%|          | 0/390 [00:00<?, ?it/s]

predict target class and its probabilty


  0%|          | 0/390 [00:00<?, ?it/s]

Validation F1 scores
 * Lead      : 0.746527
 * Position  : 0.639566
 * Claim     : 0.505014
 * Counterclaim: 0.415628
 * Rebuttal  : 0.319233
 * Evidence  : 0.635788
 * Concluding Statement: 0.767260
Overall Validation avg F1: 0.5756 val_loss:0.6642 val_accuracy:0.7773
/content/drive/MyDrive/Kaggle_Feedback-Prize-Evaluating-Student-Writing/model/nb014_v3/longformer_2.bin saved
Validation loss increased (0.559783 --> 0.575574).  Saving model ...
------------------------------ Epoch7 ------------------------------


  0%|          | 0/1560 [00:00<?, ?it/s]

Epoch 7/15 | 0500 steps: 0.63005322265625
Epoch 7/15 | 1000 steps: 0.631498291015625
Epoch 7/15 | 1500 steps: 0.6292809244791666
epoch 7 - training loss: 0.6282
epoch 7 - training accuracy: 0.7867


  0%|          | 0/390 [00:00<?, ?it/s]

predict target class and its probabilty


  0%|          | 0/390 [00:00<?, ?it/s]

Validation F1 scores
 * Lead      : 0.757233
 * Position  : 0.652097
 * Claim     : 0.528184
 * Counterclaim: 0.431050
 * Rebuttal  : 0.329218
 * Evidence  : 0.639395
 * Concluding Statement: 0.763258
Overall Validation avg F1: 0.5858 val_loss:0.6598 val_accuracy:0.7816
/content/drive/MyDrive/Kaggle_Feedback-Prize-Evaluating-Student-Writing/model/nb014_v3/longformer_2.bin saved
Validation loss increased (0.575574 --> 0.585776).  Saving model ...
------------------------------ Epoch8 ------------------------------


  0%|          | 0/1560 [00:00<?, ?it/s]

Epoch 8/15 | 0500 steps: 0.6094931640625
Epoch 8/15 | 1000 steps: 0.606763427734375
Epoch 8/15 | 1500 steps: 0.61145458984375
epoch 8 - training loss: 0.6111
epoch 8 - training accuracy: 0.7907


  0%|          | 0/390 [00:00<?, ?it/s]

predict target class and its probabilty


  0%|          | 0/390 [00:00<?, ?it/s]

Validation F1 scores
 * Lead      : 0.763955
 * Position  : 0.645004
 * Claim     : 0.524441
 * Counterclaim: 0.433223
 * Rebuttal  : 0.326764
 * Evidence  : 0.638770
 * Concluding Statement: 0.773828
Overall Validation avg F1: 0.5866 val_loss:0.6484 val_accuracy:0.7852
/content/drive/MyDrive/Kaggle_Feedback-Prize-Evaluating-Student-Writing/model/nb014_v3/longformer_2.bin saved
Validation loss increased (0.585776 --> 0.586569).  Saving model ...
------------------------------ Epoch9 ------------------------------


  0%|          | 0/1560 [00:00<?, ?it/s]

Epoch 9/15 | 0500 steps: 0.59308251953125
Epoch 9/15 | 1000 steps: 0.59516064453125
Epoch 9/15 | 1500 steps: 0.5939339192708334
epoch 9 - training loss: 0.5946
epoch 9 - training accuracy: 0.7957


  0%|          | 0/390 [00:00<?, ?it/s]

predict target class and its probabilty


  0%|          | 0/390 [00:00<?, ?it/s]

Validation F1 scores
 * Lead      : 0.761708
 * Position  : 0.653015
 * Claim     : 0.526916
 * Counterclaim: 0.441703
 * Rebuttal  : 0.336226
 * Evidence  : 0.637040
 * Concluding Statement: 0.760360
Overall Validation avg F1: 0.5881 val_loss:0.6687 val_accuracy:0.7769
Validation loss increased (0.586569 --> 0.588138).  Saving model ...
------------------------------ Epoch10 ------------------------------


  0%|          | 0/1560 [00:00<?, ?it/s]

Epoch 10/15 | 0500 steps: 0.57604736328125
Epoch 10/15 | 1000 steps: 0.584883544921875
Epoch 10/15 | 1500 steps: 0.5823003743489583
epoch 10 - training loss: 0.5816
epoch 10 - training accuracy: 0.7991


  0%|          | 0/390 [00:00<?, ?it/s]

predict target class and its probabilty


  0%|          | 0/390 [00:00<?, ?it/s]

Validation F1 scores
 * Lead      : 0.754416
 * Position  : 0.638086
 * Claim     : 0.528834
 * Counterclaim: 0.432927
 * Rebuttal  : 0.327081
 * Evidence  : 0.644578
 * Concluding Statement: 0.762464
Overall Validation avg F1: 0.5841 val_loss:0.6581 val_accuracy:0.7834
EarlyStopping counter: 1 out of 1
Early Stopping!
Fold2 best f1 score: 0.5881382525954747
------------------------------ Epoch1 ------------------------------


  0%|          | 0/1560 [00:00<?, ?it/s]

Epoch 1/15 | 0500 steps: 1.441619140625
Epoch 1/15 | 1000 steps: 1.21858056640625
Epoch 1/15 | 1500 steps: 1.1064723307291666
epoch 1 - training loss: 1.0967
epoch 1 - training accuracy: 0.6650


  0%|          | 0/390 [00:00<?, ?it/s]

predict target class and its probabilty


  0%|          | 0/390 [00:00<?, ?it/s]

Validation F1 scores
 * Lead      : 0.681577
 * Position  : 0.519701
 * Claim     : 0.306881
 * Counterclaim: 0.270633
 * Rebuttal  : 0.038911
 * Evidence  : 0.575437
 * Concluding Statement: 0.720396
Overall Validation avg F1: 0.4448 val_loss:0.8507 val_accuracy:0.7288
/content/drive/MyDrive/Kaggle_Feedback-Prize-Evaluating-Student-Writing/model/nb014_v3/longformer_3.bin saved
Validation loss increased (-inf --> 0.444791).  Saving model ...
------------------------------ Epoch2 ------------------------------


  0%|          | 0/1560 [00:00<?, ?it/s]

Epoch 2/15 | 0500 steps: 0.8201025390625
Epoch 2/15 | 1000 steps: 0.80411767578125
Epoch 2/15 | 1500 steps: 0.7926728515625
epoch 2 - training loss: 0.7906
epoch 2 - training accuracy: 0.7442


  0%|          | 0/390 [00:00<?, ?it/s]

predict target class and its probabilty


  0%|          | 0/390 [00:00<?, ?it/s]

Validation F1 scores
 * Lead      : 0.675975
 * Position  : 0.572038
 * Claim     : 0.331816
 * Counterclaim: 0.382194
 * Rebuttal  : 0.231348
 * Evidence  : 0.610260
 * Concluding Statement: 0.758985
Overall Validation avg F1: 0.5089 val_loss:0.7710 val_accuracy:0.7483
/content/drive/MyDrive/Kaggle_Feedback-Prize-Evaluating-Student-Writing/model/nb014_v3/longformer_3.bin saved
Validation loss increased (0.444791 --> 0.508945).  Saving model ...
------------------------------ Epoch3 ------------------------------


  0%|          | 0/1560 [00:00<?, ?it/s]

Epoch 3/15 | 0500 steps: 0.73738427734375
Epoch 3/15 | 1000 steps: 0.730103515625
Epoch 3/15 | 1500 steps: 0.7227820638020833
epoch 3 - training loss: 0.7229
epoch 3 - training accuracy: 0.7602


  0%|          | 0/390 [00:00<?, ?it/s]

predict target class and its probabilty


  0%|          | 0/390 [00:00<?, ?it/s]

Validation F1 scores
 * Lead      : 0.730760
 * Position  : 0.612151
 * Claim     : 0.455905
 * Counterclaim: 0.417414
 * Rebuttal  : 0.295044
 * Evidence  : 0.632756
 * Concluding Statement: 0.755745
Overall Validation avg F1: 0.5571 val_loss:0.7038 val_accuracy:0.7651
/content/drive/MyDrive/Kaggle_Feedback-Prize-Evaluating-Student-Writing/model/nb014_v3/longformer_3.bin saved
Validation loss increased (0.508945 --> 0.557111).  Saving model ...
------------------------------ Epoch4 ------------------------------


  0%|          | 0/1560 [00:00<?, ?it/s]

Epoch 4/15 | 0500 steps: 0.6951279296875
Epoch 4/15 | 1000 steps: 0.6852578125
Epoch 4/15 | 1500 steps: 0.6841404622395834
epoch 4 - training loss: 0.6837
epoch 4 - training accuracy: 0.7707


  0%|          | 0/390 [00:00<?, ?it/s]

predict target class and its probabilty


  0%|          | 0/390 [00:00<?, ?it/s]

Validation F1 scores
 * Lead      : 0.743689
 * Position  : 0.618348
 * Claim     : 0.436835
 * Counterclaim: 0.437282
 * Rebuttal  : 0.320973
 * Evidence  : 0.638089
 * Concluding Statement: 0.748393
Overall Validation avg F1: 0.5634 val_loss:0.7021 val_accuracy:0.7644
/content/drive/MyDrive/Kaggle_Feedback-Prize-Evaluating-Student-Writing/model/nb014_v3/longformer_3.bin saved
Validation loss increased (0.557111 --> 0.563373).  Saving model ...
------------------------------ Epoch5 ------------------------------


  0%|          | 0/1560 [00:00<?, ?it/s]

Epoch 5/15 | 0500 steps: 0.66425927734375
Epoch 5/15 | 1000 steps: 0.66082177734375


In [None]:
oof.head()

In [None]:
oof.to_csv(f'{Config.output_dir}/oof_{Config.name}.csv', index=False)

In [None]:
pd.read_csv(f'{Config.output_dir}/oof_{Config.name}.csv').head()

## cv score

In [None]:
if Config.is_debug:
    idlist = alltrain_texts['id'].unique().tolist()
    df_train = df_alltrain.query('id==@idlist')
else:
    df_train = df_alltrain.copy()
print(f'overall cv score: {oof_score(df_train, oof)}')

In [None]:
# Inference