In [1]:
from google.colab import drive
drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
!pip install transformers
!pip install datasets
!pip install sentencepiece

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [3]:
!nvidia-smi

Sat May  6 09:48:00 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA A100-SXM...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   37C    P0    45W / 400W |      0MiB / 40960MiB |      0%      Default |
|                               |                      |             Disabled |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [4]:

import os
import gc
import math
import time
import random
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.simplefilter('ignore')
from tqdm import tqdm
import re
import html

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import Adam, SGD, AdamW, RAdam
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader, Dataset

from sklearn.model_selection import StratifiedKFold,StratifiedGroupKFold,GroupKFold
from sklearn.metrics import log_loss,f1_score, recall_score, accuracy_score, precision_score

from transformers import AutoModel, AutoConfig, AutoTokenizer, AdamW, DataCollatorWithPadding
from transformers import get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [5]:
DIR = "/content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測"
INPUT_DIR = os.path.join(DIR,"input")
OUTPUT_DIR = os.path.join(DIR,"output")
CUSTOM_MODEL_DIR = os.path.join(OUTPUT_DIR,'clrp_facebook_bart_base_epoch10')
OUTPUT_EXP_DIR = DIR + '/output/EXP068/'
if not os.path.exists(OUTPUT_EXP_DIR):
    os.makedirs(OUTPUT_EXP_DIR)

In [6]:

# ====================================================
# CFG
# ====================================================
class CFG:
    debug=False
    apex=True
    print_freq=100
    num_workers=4
    model_name="facebook/bart-base"
    # model='microsoft/deberta-base'
    # model='roberta-base'
    # model='roberta-large'
    # model='roberta-large-mnli'
    # model='xlnet-large-cased'
    # model='albert-xxlarge-v2'
    # model="microsoft/deberta-large"
    # model="microsoft/deberta-v3-large"
    # model='microsoft/deberta-v2-xlarge'
    # model='funnel-transformer/large'
    # model='funnel-transformer/medium'
    # model='albert-base-v2'
    # model='albert-large-v2'
    # model='google/electra-large-discriminator'
    # model='google/electra-base-discriminator'
    # model="facebook/bart-large-mnli"
    # model="facebook/bart-large"
    # model="facebook/bart-base"
    model = CUSTOM_MODEL_DIR
    scheduler='cosine' # ['linear', 'cosine']
    batch_scheduler=True
    num_cycles=0.5
    num_warmup_steps=0
    epochs=5
    encoder_lr=2e-5
    decoder_lr=2e-5
    min_lr=1e-6
    eps=1e-6
    betas=(0.9, 0.999)
    batch_size=16
    fc_dropout=0.2
    target_size=1
    max_len=512
    weight_decay=0.01
    gradient_accumulation_steps=1
    max_grad_norm=1000
    seed=42
    n_fold=10
    trn_fold=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
    train=True
    nth_awp_start_epoch=1
    gradient_checkpointing = False
    freezing = False
    num_reinit_layers = 1
    is_reinit_layer = False
    fgm = False
    awp_start=1

if CFG.debug:
    CFG.epochs = 2
    CFG.trn_fold = [0, 1]

In [7]:
def get_score(labels, outputs):
    thresh = 0.5
    y_pred = outputs
    y_true = labels
    f_score = f1_score(y_true, (y_pred>thresh).astype(int))
    r_score = recall_score(y_true, (y_pred>thresh).astype(int))
    p_score = precision_score(y_true, (y_pred>thresh).astype(int))
    print(f"f1 score : {f_score}")
    print(f"recall score : {r_score}")
    print(f"precision score : {p_score}")
    return accuracy_score(y_true, (y_pred>thresh).astype(int))

def get_acc_score(labels, outputs):
    y_pred = outputs
    y_true = labels
    best_score = 0
    best_thresh = 0.5
    for thresh in np.arange(0.1, 0.80, 0.01):
        thresh = np.round(thresh, 2)
        score = accuracy_score(y_true, (y_pred>thresh).astype(int))
        #print("Accuracy score at threshold {0} is {1}".format(thresh, score))
        if score > best_score:
          best_score = score
          best_thresh = thresh
    print(f"thresh : {best_thresh}")
    return accuracy_score(y_true, (y_pred>best_thresh).astype(int))


def get_logger(filename=OUTPUT_EXP_DIR+'train'):
    from logging import getLogger, INFO, FileHandler, Formatter, StreamHandler
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=f"{filename}.log")
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

LOGGER = get_logger()

def seed_everything(seed=CFG.seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_everything(seed=CFG.seed)

In [8]:
def freeze(module):
    """
    Freezes module's parameters.
    """
    
    for parameter in module.parameters():
        parameter.requires_grad = False
        
def get_freezed_parameters(module):
    """
    Returns names of freezed parameters of the given module.
    """
    
    freezed_parameters = []
    for name, parameter in module.named_parameters():
        if not parameter.requires_grad:
            freezed_parameters.append(name)
            
    return freezed_parameters

def set_embedding_parameters_bits(embeddings_path, optim_bits=32):
    """
    https://github.com/huggingface/transformers/issues/14819#issuecomment-1003427930
    """
    
    embedding_types = ("word", "position", "token_type")
    for embedding_type in embedding_types:
        attr_name = f"{embedding_type}_embeddings"
        
        if hasattr(embeddings_path, attr_name): 
            bnb.optim.GlobalOptimManager.get_instance().register_module_override(
                getattr(embeddings_path, attr_name), 'weight', {'optim_bits': optim_bits}
            )

In [9]:
import pandas as pd
import numpy as np

train = pd.read_csv(os.path.join(INPUT_DIR,"train_data.csv"))
test = pd.read_csv(os.path.join(INPUT_DIR,"test_data.csv"))
sample_sub = pd.read_csv(os.path.join(INPUT_DIR,"submission.csv"))

train = train.sample(frac=1, random_state=CFG.seed).reset_index()

print(train.shape)
display(train.head(3))

print(test.shape)
display(test.head(3))

print(sample_sub.shape)
display(sample_sub.head(3))

(4974, 7)


Unnamed: 0,index,id,title,year,abstract,keywords,y
0,721,722,Global Optimality Conditions for Deep Neural N...,2018,We study the error landscape of deep linear an...,"deep linear neural networks, global optimality...",1
1,144,145,Multi-Task Learning by Deep Collaboration and ...,2018,Convolutional neural networks (CNN) have becom...,"multi-task learning, soft parameter sharing, f...",0
2,4542,4543,On the Need for Topology-Aware Generative Mode...,2020,"ML algorithms or models, especially deep neura...","Manifold-based Defense, Robust Learning, Adver...",1


(6393, 5)


Unnamed: 0,id,title,year,abstract,keywords
0,1,StyleAlign: Analysis and Applications of Align...,2022,"In this paper, we perform an in-depth study of...","StyleGAN, transfer learning, fine tuning, mode..."
1,2,Embedding a random graph via GNN: mean-field i...,2021,We develop a theory for embedding a random gra...,"Graph neural network, graph embedding, multi-r..."
2,3,BBRefinement: an universal scheme to improve p...,2021,We present a conceptually simple yet powerful ...,"object detection, deep neural networks, refine..."


(6393, 2)


Unnamed: 0,id,y
0,1,0
1,2,0
2,3,0


In [10]:
train["texts"] = train["title"] + "</s>" + train["abstract"] 

In [11]:
skf = StratifiedKFold(n_splits=CFG.n_fold,shuffle=True,random_state=CFG.seed)
for fold, ( _, val_) in enumerate(skf.split(train, train.y)):
    train.loc[val_ , "kfold"] = int(fold)
    
train["kfold"] = train["kfold"].astype(int)

if CFG.debug:
    display(train.groupby('kfold').size())
    train = train.sample(n=500, random_state=0).reset_index(drop=True)
    display(train.groupby('kfold').size())

In [12]:
# ====================================================
# tokenizer
# ====================================================
tokenizer = AutoTokenizer.from_pretrained(CFG.model)
tokenizer.save_pretrained(OUTPUT_EXP_DIR+'tokenizer/')
CFG.tokenizer = tokenizer

In [13]:
# ====================================================
# Define max_len
# ====================================================
lengths = []
tk0 = tqdm(train['texts'].fillna("").values, total=len(train))
for text in tk0:
    length = len(tokenizer(text, add_special_tokens=False)['input_ids'])
    lengths.append(length)
CFG.max_len = max(lengths) + 3 # cls + sep + sep
LOGGER.info(f"max_len: {CFG.max_len}")

100%|██████████| 4974/4974 [00:03<00:00, 1497.86it/s]
max_len: 653
INFO:__main__:max_len: 653


In [14]:
class AWP:
    def __init__(self, model, optimizer, *, adv_param='weight',
                 adv_lr=0.001, adv_eps=0.001):
        self.model = model
        self.optimizer = optimizer
        self.adv_param = adv_param
        self.adv_lr = adv_lr
        self.adv_eps = adv_eps
        self.backup = {}

    def perturb(self, inputs, y, criterion):
        """
        Perturb model parameters for AWP gradient
        Call before loss and loss.backward()
        """
        self._save()  # save model parameters
        self._attack_step()  # perturb weights

    def _attack_step(self):
        e = 1e-6
        for name, param in self.model.named_parameters():
            if param.requires_grad and param.grad is not None and self.adv_param in name:
                grad = self.optimizer.state[param]['exp_avg']
                norm_grad = torch.norm(grad)
                norm_data = torch.norm(param.detach())

                if norm_grad != 0 and not torch.isnan(norm_grad):
                    # Set lower and upper limit in change
                    limit_eps = self.adv_eps * param.detach().abs()
                    param_min = param.data - limit_eps
                    param_max = param.data + limit_eps

                    # Perturb along gradient
                    # w += (adv_lr * |w| / |grad|) * grad
                    param.data.add_(grad, alpha=(self.adv_lr * (norm_data + e) / (norm_grad + e)))

                    # Apply the limit to the change
                    param.data.clamp_(param_min, param_max)

    def _save(self):
        for name, param in self.model.named_parameters():
            if param.requires_grad and param.grad is not None and self.adv_param in name:
                if name not in self.backup:
                    self.backup[name] = param.clone().detach()
                else:
                    self.backup[name].copy_(param.data)

    def restore(self):
        """
        Restore model parameter to correct position; AWP do not perturbe weights, it perturb gradients
        Call after loss.backward(), before optimizer.step()
        """
        for name, param in self.model.named_parameters():
            if name in self.backup:
                param.data.copy_(self.backup[name])

In [15]:
# ====================================================
# Dataset
# ====================================================
def prepare_input(cfg, text):
    inputs = cfg.tokenizer(text,
                           add_special_tokens=True,
                           max_length=cfg.max_len,
                           padding="max_length",
                           return_offsets_mapping=False,
                           truncation=True)
    for k, v in inputs.items():
        inputs[k] = torch.tensor(v, dtype=torch.long)
    return inputs


class TrainDataset(Dataset):
    def __init__(self, cfg, df):
        self.cfg = cfg
        self.inputs = df['texts'].values
        self.labels = df['y'].values

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, item):
        inputs = prepare_input(self.cfg, self.inputs[item])
        label = torch.tensor(self.labels[item], dtype=torch.float)
        return inputs, label

def collate(inputs):
    mask_len = int(inputs["attention_mask"].sum(axis=1).max())
    for k, v in inputs.items():
        inputs[k] = inputs[k][:,:mask_len]
    return inputs

def collate(inputs):
    mask_len = int(inputs["attention_mask"].sum(axis=1).max())
    for k, v in inputs.items():
        inputs[k] = inputs[k][:,:mask_len]
    return inputs

#collate_fn = DataCollatorWithPadding(tokenizer=tokenizer)

In [16]:
def reinit_layers(model):

    #for layer in model.model.encoder.layer[-CFG.num_reinit_layers:]:
    for layer in model.encoder.layer[-CFG.num_reinit_layers:]:    #Custome model内(backbone)

            for module in layer.modules():

                if isinstance(module,nn.Linear):
                    module.weight.data.normal_(mean=0.0,std=model.config.initializer_range)
                    if module.bias is not None:
                            module.bias.data.zero_()
                elif isinstance(module, nn.Embedding):
                        module.weight.data.normal_(mean=0.0, std=model.config.initializer_range)
                        if module.padding_idx is not None:
                            module.weight.data[module.padding_idx].zero_()
                elif isinstance(module, nn.LayerNorm):
                        module.bias.data.zero_()
                        module.weight.data.fill_(1.0)
                        
    return model

In [17]:
# ====================================================
# Model
# ====================================================
class MeanPooling(nn.Module):
    def __init__(self):
        super(MeanPooling, self).__init__()
        
    def forward(self, last_hidden_state, attention_mask):
        input_mask_expanded = attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float()
        sum_embeddings = torch.sum(last_hidden_state * input_mask_expanded, 1)
        sum_mask = input_mask_expanded.sum(1)
        sum_mask = torch.clamp(sum_mask, min=1e-9)
        mean_embeddings = sum_embeddings / sum_mask
        return mean_embeddings

class MaxPooling(nn.Module):
    def __init__(self):
        super(MaxPooling, self).__init__()
        
    def forward(self, last_hidden_state, attention_mask):
        input_mask_expanded = attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float()
        embeddings = last_hidden_state.clone()
        embeddings[input_mask_expanded == 0] = -1e4
        max_embeddings, _ = torch.max(embeddings, dim=1)
        return max_embeddings
    

class CustomModel(nn.Module):
    def __init__(self, cfg, config_path=None, pretrained=False):
        super().__init__()
        self.cfg = cfg
        if config_path is None:
            self.config = AutoConfig.from_pretrained(cfg.model, output_hidden_states=True)
            self.config.hidden_dropout = 0.
            self.config.hidden_dropout_prob = 0.
            self.config.attention_dropout = 0.
            self.config.attention_probs_dropout_prob = 0.
            LOGGER.info(self.config)
        else:
            self.config = torch.load(config_path)
        if pretrained:
            self.model = AutoModel.from_pretrained(cfg.model, config=self.config)
        else:
            self.model = AutoModel(self.config)
        if CFG.is_reinit_layer:
            self.model = reinit_layers(self.model)
            print(f'Reinitializing Last {CFG.num_reinit_layers} Layers.')
        if self.cfg.gradient_checkpointing:
            self.model.gradient_checkpointing_enable()

        # Freezing
        if cfg.freezing:
            # freezing embeddings and first 2 layers of encoder
            freeze((self.model).embeddings)
            freeze((self.model).encoder.layer[:2])
            cfg.after_freezed_parameters = filter(lambda parameter: parameter.requires_grad, (self.model).parameters())

        self.pool = MaxPooling()
        self.fc = nn.Linear(self.config.hidden_size, cfg.target_size)
        #self._init_weights(self.fc)
        self.layer_norm1 = nn.LayerNorm(self.config.hidden_size)
        
    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.bias is not None:
                module.bias.data.zero_()
        elif isinstance(module, nn.Embedding):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.padding_idx is not None:
                module.weight.data[module.padding_idx].zero_()
        elif isinstance(module, nn.LayerNorm):
            module.bias.data.zero_()
            module.weight.data.fill_(1.0)
        
    def feature(self, inputs):
        outputs = self.model(**inputs)
        last_hidden_states = outputs[0]
        feature = self.pool(last_hidden_states, inputs['attention_mask'])
        return feature

    def forward(self, inputs):
        feature = self.feature(inputs)
        feature = self.layer_norm1(feature)
        output = self.fc(feature)
        return output

In [18]:
def calculate_loss(inputs, labels, model, criterion, is_valid=True, device="cpu"):    
    y_preds = model(inputs)
    loss = criterion(y_preds.view(-1, 1), labels.view(-1, 1))
    return (loss, y_preds) if is_valid else loss

In [19]:

# ====================================================
# Helper functions
# ====================================================
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (remain %s)' % (asMinutes(s), asMinutes(rs))


def train_fn(fold, train_loader, model, criterion, optimizer, epoch, scheduler, device, awp):
    model.zero_grad()
    model.train()
    awp_start = CFG.awp_start
    scaler = torch.cuda.amp.GradScaler(enabled=CFG.apex)
    losses = AverageMeter()
    start = end = time.time()
    global_step = 0
    for step, (inputs, labels) in enumerate(train_loader):
        inputs = collate(inputs)
        for k, v in inputs.items():
            inputs[k] = v.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        if epoch >= awp_start:
            awp.perturb(inputs, labels, criterion)
        with torch.cuda.amp.autocast(enabled=CFG.apex):
            loss = calculate_loss(inputs=inputs, labels=labels, model=model, criterion=criterion, is_valid=False, device=device)
        #print(y_preds.sigmoid().squeeze().view(1, -1))
        #loss = criterion(y_preds.sigmoid().squeeze(), labels.squeeze())
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        losses.update(loss.item(), batch_size)
        scaler.scale(loss).backward()
        if scaler is not None:
            scaler.unscale_(optimizer)
        torch.nn.utils.clip_grad_norm_(model.parameters(), CFG.max_grad_norm)
        awp.restore()
        if CFG.fgm:
          fgm.attack() 
          adversarial_loss = calculate_loss(inputs=inputs, labels=labels, model=model, criterion=criterion, is_valid=False, device=device)
          scaler.scale(adversarial_loss).backward()
          fgm.restore()
        if (step + 1) % CFG.gradient_accumulation_steps == 0:
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()
            global_step += 1
            if CFG.batch_scheduler:
                scheduler.step()
        end = time.time()
        if step % CFG.print_freq == 0 or step == (len(train_loader)-1):
            print('Epoch: [{0}][{1}/{2}] '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  #'Grad: {grad_norm:.4f}  '
                  'LR: {lr:.8f}  '
                  .format(epoch+1, step, len(train_loader), 
                          remain=timeSince(start, float(step+1)/len(train_loader)),
                          loss=losses,
                          #grad_norm=grad_norm,
                          lr=scheduler.get_lr()[0]))

    return losses.avg


def valid_fn(valid_loader, model, criterion, device):
    losses = AverageMeter()
    model.eval()
    preds = []
    start = end = time.time()
    for step, (inputs, labels) in enumerate(valid_loader):
        inputs = collate(inputs)
        for k, v in inputs.items():
            inputs[k] = v.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        with torch.no_grad():
            loss, y_preds = calculate_loss(inputs=inputs, labels=labels, model=model, criterion=criterion, is_valid=True, device=device)
        #loss = criterion(y_preds.sigmoid().squeeze(), labels.squeeze())
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        losses.update(loss.item(), batch_size)
        preds.append(y_preds.sigmoid().to('cpu').numpy())
        end = time.time()
        if step % CFG.print_freq == 0 or step == (len(valid_loader)-1):
            print('EVAL: [{0}/{1}] '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  .format(step, len(valid_loader),
                          loss=losses,
                          remain=timeSince(start, float(step+1)/len(valid_loader))))
    predictions = np.concatenate(preds)
    predictions = np.concatenate(predictions)
    return losses.avg, predictions


def inference_fn(test_loader, model, device):
    preds = []
    model.eval()
    model.to(device)
    tk0 = tqdm(test_loader, total=len(test_loader))
    for inputs in tk0:
        inputs = collate(inputs)
        for k, v in inputs.items():
            inputs[k] = v.to(device)
        with torch.no_grad():
            y_preds = model(inputs)
        preds.append(y_preds.sigmoid().to('cpu').numpy())
    predictions = np.concatenate(preds)
    return predictions

In [20]:

# ====================================================
# train loop
# ====================================================
def train_loop(folds, fold):
    
    LOGGER.info(f"========== fold: {fold} training ==========")

    # ====================================================
    # loader
    # ====================================================
    train_folds = folds[folds['kfold'] != fold].reset_index(drop=True)
    valid_folds = folds[folds['kfold'] == fold].reset_index(drop=True)
    valid_labels = valid_folds['y'].values
    
    train_dataset = TrainDataset(CFG, train_folds)
    valid_dataset = TrainDataset(CFG, valid_folds)


    train_loader = DataLoader(train_dataset,
                              batch_size=CFG.batch_size,
                              shuffle=True,
                              num_workers=CFG.num_workers, pin_memory=True, drop_last=True)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=CFG.batch_size*2,
                              shuffle=False,
                              num_workers=CFG.num_workers, pin_memory=True, drop_last=False)

    # ====================================================
    # model & optimizer
    # ====================================================
    model = CustomModel(CFG, config_path=None, pretrained=True)
    torch.save(model.config, OUTPUT_EXP_DIR+'config.pth')
    model.to(device)
    
    def get_optimizer_params(model, encoder_lr=5e-6, decoder_lr=1e-4, weight_decay=0.0):
        param_optimizer = list(model.named_parameters())
        no_decay = ["bias", "LayerNorm.bias", 
                    "LayerNorm.weight"]
        group1=['layer.0.','layer.1.','layer.2.','layer.3.']
        group2=['layer.4.','layer.5.','layer.6.','layer.7.']    
        group3=['layer.8.','layer.9.','layer.10.','layer.11.']
        group_all=['layer.0.','layer.1.','layer.2.','layer.3.','layer.4.','layer.5.','layer.6.','layer.7.','layer.8.','layer.9.','layer.10.','layer.11.']
        optimizer_parameters = [
        {'params': [p for n, p in model.model.named_parameters() if not any(nd in n for nd in no_decay) and not any(nd in n for nd in group_all)],'weight_decay': weight_decay},
        {'params': [p for n, p in model.model.named_parameters() if not any(nd in n for nd in no_decay) and any(nd in n for nd in group1)],'weight_decay': weight_decay, 'lr': encoder_lr/2.6},
        {'params': [p for n, p in model.model.named_parameters() if not any(nd in n for nd in no_decay) and any(nd in n for nd in group2)],'weight_decay': weight_decay, 'lr': encoder_lr},
        {'params': [p for n, p in model.model.named_parameters() if not any(nd in n for nd in no_decay) and any(nd in n for nd in group3)],'weight_decay': weight_decay, 'lr': encoder_lr*2.6},
        {'params': [p for n, p in model.model.named_parameters() if any(nd in n for nd in no_decay) and not any(nd in n for nd in group_all)],'weight_decay': 0.0},
        {'params': [p for n, p in model.model.named_parameters() if any(nd in n for nd in no_decay) and any(nd in n for nd in group1)],'weight_decay': 0.0, 'lr': encoder_lr/2.6},
        {'params': [p for n, p in model.model.named_parameters() if any(nd in n for nd in no_decay) and any(nd in n for nd in group2)],'weight_decay': 0.0, 'lr': encoder_lr},
        {'params': [p for n, p in model.model.named_parameters() if any(nd in n for nd in no_decay) and any(nd in n for nd in group3)],'weight_decay': 0.0, 'lr': encoder_lr*2.6},
        {'params': [p for n, p in model.named_parameters() if "model" not in n], 'lr':decoder_lr, "momentum" : 0.99},
    ]
        return optimizer_parameters

    optimizer_parameters = get_optimizer_params(model,
                                                encoder_lr=CFG.encoder_lr, 
                                                decoder_lr=CFG.decoder_lr,
                                                weight_decay=CFG.weight_decay)
    optimizer = AdamW(optimizer_parameters, lr=CFG.encoder_lr, eps=CFG.eps, betas=CFG.betas)
    
    # ====================================================
    # scheduler
    # ====================================================
    def get_scheduler(cfg, optimizer, num_train_steps):
        if cfg.scheduler == 'linear':
            scheduler = get_linear_schedule_with_warmup(
                optimizer, num_warmup_steps=cfg.num_warmup_steps, num_training_steps=num_train_steps
            )
        elif cfg.scheduler == 'cosine':
            scheduler = get_cosine_schedule_with_warmup(
                optimizer, num_warmup_steps=cfg.num_warmup_steps, num_training_steps=num_train_steps, num_cycles=cfg.num_cycles
            )
        return scheduler
    
    num_train_steps = int(len(train_folds) / CFG.batch_size * CFG.epochs)
    scheduler = get_scheduler(CFG, optimizer, num_train_steps)

    # ====================================================
    # loop
    # ====================================================
    criterion = nn.BCEWithLogitsLoss()

    print('Enable AWP')
    awp = AWP(model, optimizer, adv_lr=0.001, adv_eps=0.001)
    #print('Enable FGM')
    #fgm = FGM(model=model, eps=0.1)
    
    best_score = -1.

    for epoch in range(CFG.epochs):

        start_time = time.time()

        # train
        avg_loss = train_fn(fold, train_loader, model, criterion, optimizer, epoch, scheduler, device, awp)

        # eval
        avg_val_loss, predictions = valid_fn(valid_loader, model, criterion, device)
        
        # scoring
        score_05 = get_score(valid_labels, predictions)
        score = get_acc_score(valid_labels, predictions)

        elapsed = time.time() - start_time

        LOGGER.info(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.0f}s')
        LOGGER.info(f'Epoch {epoch+1} - Score: {score:.4f}')

        
        if best_score < score:
            best_score = score
            LOGGER.info(f'Epoch {epoch+1} - Save Best Score: {best_score:.4f} Model')
            torch.save({'model': model.state_dict(),
                        'predictions': predictions},
                        OUTPUT_EXP_DIR+f"{CFG.model_name.replace('/', '-')}_fold{fold}_best.pth")

    predictions = torch.load(OUTPUT_EXP_DIR+f"{CFG.model_name.replace('/', '-')}_fold{fold}_best.pth", 
                             map_location=torch.device('cpu'))['predictions']
    valid_folds['pred'] = predictions

    torch.cuda.empty_cache()
    gc.collect()
    
    return valid_folds

In [None]:
if __name__ == '__main__':
    
    def get_result(oof_df):
        labels = oof_df['y'].values
        preds = oof_df['pred'].values
        score = get_score(labels, preds)
        acc_score = get_acc_score(labels, preds)
        LOGGER.info(f'Score: {score:<.4f}')
        LOGGER.info(f'ACC BEST Score: {acc_score:<.4f}')
    
    if CFG.train:
        oof_df = pd.DataFrame()
        for fold in range(CFG.n_fold):
            if fold in CFG.trn_fold:
                _oof_df = train_loop(train, fold)
                oof_df = pd.concat([oof_df, _oof_df])
                LOGGER.info(f"========== fold: {fold} result ==========")
                get_result(_oof_df)
            #break
        oof_df = oof_df.reset_index(drop=True)
        LOGGER.info(f"========== CV ==========")
        get_result(oof_df)
        oof_df.to_pickle(OUTPUT_EXP_DIR+'oof_df.pkl')

BartConfig {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_facebook_bart_base_epoch10",
  "activation_dropout": 0.1,
  "activation_function": "gelu",
  "add_bias_logits": false,
  "add_final_layer_norm": false,
  "architectures": [
    "BartForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "bos_token_id": 0,
  "classif_dropout": 0.1,
  "classifier_dropout": 0.0,
  "d_model": 768,
  "decoder_attention_heads": 12,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 6,
  "decoder_start_token_id": 2,
  "dropout": 0.1,
  "early_stopping": true,
  "encoder_attention_heads": 12,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_layers": 6,
  "eos_token_id": 2,
  "forced_bos_token_id": 0,
  "forced_eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_dropout": 0.0,
  "hidden_dropout_prob

Enable AWP
Epoch: [1][0/279] Elapsed 0m 3s (remain 17m 24s) Loss: 0.5759(0.5759) LR: 0.00002000  
Epoch: [1][100/279] Elapsed 0m 15s (remain 0m 26s) Loss: 0.4420(0.6493) LR: 0.00001974  
Epoch: [1][200/279] Elapsed 0m 26s (remain 0m 10s) Loss: 0.5238(0.6274) LR: 0.00001900  
Epoch: [1][278/279] Elapsed 0m 35s (remain 0m 0s) Loss: 0.7899(0.6217) LR: 0.00001810  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.6156(0.6156) 


Epoch 1 - avg_train_loss: 0.6217  avg_val_loss: 0.6143  time: 39s
INFO:__main__:Epoch 1 - avg_train_loss: 0.6217  avg_val_loss: 0.6143  time: 39s
Epoch 1 - Score: 0.7008
INFO:__main__:Epoch 1 - Score: 0.7008
Epoch 1 - Save Best Score: 0.7008 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7008 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.7265(0.6143) 
f1 score : 0.0
recall score : 0.0
precision score : 0.0
thresh : 0.37
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 19s) Loss: 0.6824(0.6824) LR: 0.00001808  
Epoch: [2][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.5759(0.5711) LR: 0.00001657  
Epoch: [2][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.4417(0.5740) LR: 0.00001473  
Epoch: [2][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.6554(0.5751) LR: 0.00001312  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.5318(0.5318) 


Epoch 2 - avg_train_loss: 0.5751  avg_val_loss: 0.5746  time: 36s
INFO:__main__:Epoch 2 - avg_train_loss: 0.5751  avg_val_loss: 0.5746  time: 36s
Epoch 2 - Score: 0.7088
INFO:__main__:Epoch 2 - Score: 0.7088
Epoch 2 - Save Best Score: 0.7088 Model
INFO:__main__:Epoch 2 - Save Best Score: 0.7088 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.6875(0.5746) 
f1 score : 0.17977528089887643
recall score : 0.10457516339869281
precision score : 0.64
thresh : 0.51
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 33s) Loss: 0.5904(0.5904) LR: 0.00001309  
Epoch: [3][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.4543(0.5326) LR: 0.00001090  
Epoch: [3][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.4557(0.5346) LR: 0.00000866  
Epoch: [3][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.6681(0.5326) LR: 0.00000695  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5135(0.5135) 


Epoch 3 - avg_train_loss: 0.5326  avg_val_loss: 0.5929  time: 36s
INFO:__main__:Epoch 3 - avg_train_loss: 0.5326  avg_val_loss: 0.5929  time: 36s
Epoch 3 - Score: 0.7108
INFO:__main__:Epoch 3 - Score: 0.7108
Epoch 3 - Save Best Score: 0.7108 Model
INFO:__main__:Epoch 3 - Save Best Score: 0.7108 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.7104(0.5929) 
f1 score : 0.25742574257425743
recall score : 0.16993464052287582
precision score : 0.5306122448979592
thresh : 0.64
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 30s) Loss: 0.5719(0.5719) LR: 0.00000693  
Epoch: [4][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.4730(0.4674) LR: 0.00000488  
Epoch: [4][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.3485(0.4630) LR: 0.00000310  
Epoch: [4][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.4907(0.4698) LR: 0.00000194  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4689(0.4689) 


Epoch 4 - avg_train_loss: 0.4698  avg_val_loss: 0.5835  time: 36s
INFO:__main__:Epoch 4 - avg_train_loss: 0.4698  avg_val_loss: 0.5835  time: 36s
Epoch 4 - Score: 0.7108
INFO:__main__:Epoch 4 - Score: 0.7108


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.7061(0.5835) 
f1 score : 0.4031620553359684
recall score : 0.3333333333333333
precision score : 0.51
thresh : 0.69
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 28s) Loss: 0.5095(0.5095) LR: 0.00000193  
Epoch: [5][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.3977(0.4446) LR: 0.00000082  
Epoch: [5][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.4969(0.4389) LR: 0.00000017  
Epoch: [5][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.2488(0.4375) LR: 0.00000000  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.4563(0.4563) 


Epoch 5 - avg_train_loss: 0.4375  avg_val_loss: 0.5946  time: 36s
INFO:__main__:Epoch 5 - avg_train_loss: 0.4375  avg_val_loss: 0.5946  time: 36s
Epoch 5 - Score: 0.7129
INFO:__main__:Epoch 5 - Score: 0.7129
Epoch 5 - Save Best Score: 0.7129 Model
INFO:__main__:Epoch 5 - Save Best Score: 0.7129 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.7166(0.5946) 
f1 score : 0.46853146853146854
recall score : 0.43790849673202614
precision score : 0.5037593984962406
thresh : 0.75


Score: 0.6948
INFO:__main__:Score: 0.6948
ACC BEST Score: 0.7129
INFO:__main__:ACC BEST Score: 0.7129
BartConfig {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_facebook_bart_base_epoch10",
  "activation_dropout": 0.1,
  "activation_function": "gelu",
  "add_bias_logits": false,
  "add_final_layer_norm": false,
  "architectures": [
    "BartForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "bos_token_id": 0,
  "classif_dropout": 0.1,
  "classifier_dropout": 0.0,
  "d_model": 768,
  "decoder_attention_heads": 12,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 6,
  "decoder_start_token_id": 2,
  "dropout": 0.1,
  "early_stopping": true,
  "encoder_attention_heads": 12,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_layers": 6,
  "eos_token_id": 2,
  "forced_bos_token_id": 0,
  "force

f1 score : 0.46853146853146854
recall score : 0.43790849673202614
precision score : 0.5037593984962406
thresh : 0.75


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_facebook_bart_base_epoch10 were not used when initializing BartModel: ['lm_head.weight', 'final_logits_bias']
- This IS expected if you are initializing BartModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BartModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Enable AWP
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 44s) Loss: 0.7915(0.7915) LR: 0.00002000  
Epoch: [1][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.4517(0.6313) LR: 0.00001974  
Epoch: [1][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.5952(0.6320) LR: 0.00001900  
Epoch: [1][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.6038(0.6274) LR: 0.00001810  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.5558(0.5558) 


Epoch 1 - avg_train_loss: 0.6274  avg_val_loss: 0.6167  time: 36s
INFO:__main__:Epoch 1 - avg_train_loss: 0.6274  avg_val_loss: 0.6167  time: 36s
Epoch 1 - Score: 0.6948
INFO:__main__:Epoch 1 - Score: 0.6948
Epoch 1 - Save Best Score: 0.6948 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.6948 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.6101(0.6167) 
f1 score : 0.11428571428571428
recall score : 0.06535947712418301
precision score : 0.45454545454545453
thresh : 0.53
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 30s) Loss: 0.5809(0.5809) LR: 0.00001808  
Epoch: [2][100/279] Elapsed 0m 11s (remain 0m 21s) Loss: 0.4518(0.5896) LR: 0.00001657  
Epoch: [2][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.4962(0.5766) LR: 0.00001473  
Epoch: [2][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.6182(0.5737) LR: 0.00001312  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.5338(0.5338) 


Epoch 2 - avg_train_loss: 0.5737  avg_val_loss: 0.6094  time: 36s
INFO:__main__:Epoch 2 - avg_train_loss: 0.5737  avg_val_loss: 0.6094  time: 36s
Epoch 2 - Score: 0.7229
INFO:__main__:Epoch 2 - Score: 0.7229
Epoch 2 - Save Best Score: 0.7229 Model
INFO:__main__:Epoch 2 - Save Best Score: 0.7229 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.6166(0.6094) 
f1 score : 0.445859872611465
recall score : 0.45751633986928103
precision score : 0.43478260869565216
thresh : 0.66
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 31s) Loss: 0.6218(0.6218) LR: 0.00001309  
Epoch: [3][100/279] Elapsed 0m 11s (remain 0m 21s) Loss: 0.8437(0.5383) LR: 0.00001090  
Epoch: [3][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.6702(0.5353) LR: 0.00000866  
Epoch: [3][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.4633(0.5302) LR: 0.00000695  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.4142(0.4142) 


Epoch 3 - avg_train_loss: 0.5302  avg_val_loss: 0.5669  time: 36s
INFO:__main__:Epoch 3 - avg_train_loss: 0.5302  avg_val_loss: 0.5669  time: 36s
Epoch 3 - Score: 0.7189
INFO:__main__:Epoch 3 - Score: 0.7189


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.5616(0.5669) 
f1 score : 0.3247863247863248
recall score : 0.24836601307189543
precision score : 0.4691358024691358
thresh : 0.62
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 36s) Loss: 0.4161(0.4161) LR: 0.00000693  
Epoch: [4][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.7544(0.4871) LR: 0.00000488  
Epoch: [4][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.4535(0.4763) LR: 0.00000310  
Epoch: [4][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.3140(0.4687) LR: 0.00000194  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4280(0.4280) 


Epoch 4 - avg_train_loss: 0.4687  avg_val_loss: 0.5843  time: 36s
INFO:__main__:Epoch 4 - avg_train_loss: 0.4687  avg_val_loss: 0.5843  time: 36s
Epoch 4 - Score: 0.7149
INFO:__main__:Epoch 4 - Score: 0.7149


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.6011(0.5843) 
f1 score : 0.4113475177304965
recall score : 0.3790849673202614
precision score : 0.4496124031007752
thresh : 0.67
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 43s) Loss: 0.5511(0.5511) LR: 0.00000193  
Epoch: [5][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.3403(0.4410) LR: 0.00000082  
Epoch: [5][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.3585(0.4387) LR: 0.00000017  
Epoch: [5][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.4531(0.4331) LR: 0.00000000  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4498(0.4498) 


Epoch 5 - avg_train_loss: 0.4331  avg_val_loss: 0.5905  time: 36s
INFO:__main__:Epoch 5 - avg_train_loss: 0.4331  avg_val_loss: 0.5905  time: 36s
Epoch 5 - Score: 0.7149
INFO:__main__:Epoch 5 - Score: 0.7149


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.6223(0.5905) 
f1 score : 0.43365695792880254
recall score : 0.43790849673202614
precision score : 0.42948717948717946
thresh : 0.71


Score: 0.6506
INFO:__main__:Score: 0.6506
ACC BEST Score: 0.7229
INFO:__main__:ACC BEST Score: 0.7229
BartConfig {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_facebook_bart_base_epoch10",
  "activation_dropout": 0.1,
  "activation_function": "gelu",
  "add_bias_logits": false,
  "add_final_layer_norm": false,
  "architectures": [
    "BartForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "bos_token_id": 0,
  "classif_dropout": 0.1,
  "classifier_dropout": 0.0,
  "d_model": 768,
  "decoder_attention_heads": 12,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 6,
  "decoder_start_token_id": 2,
  "dropout": 0.1,
  "early_stopping": true,
  "encoder_attention_heads": 12,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_layers": 6,
  "eos_token_id": 2,
  "forced_bos_token_id": 0,
  "force

f1 score : 0.445859872611465
recall score : 0.45751633986928103
precision score : 0.43478260869565216
thresh : 0.66


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_facebook_bart_base_epoch10 were not used when initializing BartModel: ['lm_head.weight', 'final_logits_bias']
- This IS expected if you are initializing BartModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BartModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Enable AWP
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 47s) Loss: 0.6712(0.6712) LR: 0.00002000  
Epoch: [1][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.5192(0.6286) LR: 0.00001974  
Epoch: [1][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.7192(0.6229) LR: 0.00001900  
Epoch: [1][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.5957(0.6193) LR: 0.00001810  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5582(0.5582) 


Epoch 1 - avg_train_loss: 0.6193  avg_val_loss: 0.5819  time: 36s
INFO:__main__:Epoch 1 - avg_train_loss: 0.6193  avg_val_loss: 0.5819  time: 36s
Epoch 1 - Score: 0.7149
INFO:__main__:Epoch 1 - Score: 0.7149
Epoch 1 - Save Best Score: 0.7149 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7149 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.4342(0.5819) 
f1 score : 0.10843373493975904
recall score : 0.058823529411764705
precision score : 0.6923076923076923
thresh : 0.4
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 34s) Loss: 0.4562(0.4562) LR: 0.00001808  
Epoch: [2][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.3414(0.5746) LR: 0.00001657  
Epoch: [2][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.5172(0.5764) LR: 0.00001473  
Epoch: [2][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.3863(0.5725) LR: 0.00001312  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5096(0.5096) 


Epoch 2 - avg_train_loss: 0.5725  avg_val_loss: 0.5707  time: 36s
INFO:__main__:Epoch 2 - avg_train_loss: 0.5725  avg_val_loss: 0.5707  time: 36s
Epoch 2 - Score: 0.7229
INFO:__main__:Epoch 2 - Score: 0.7229
Epoch 2 - Save Best Score: 0.7229 Model
INFO:__main__:Epoch 2 - Save Best Score: 0.7229 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.3798(0.5707) 
f1 score : 0.2111111111111111
recall score : 0.12418300653594772
precision score : 0.7037037037037037
thresh : 0.48
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 39s) Loss: 0.6323(0.6323) LR: 0.00001309  
Epoch: [3][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.3805(0.5176) LR: 0.00001090  
Epoch: [3][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.7203(0.5223) LR: 0.00000866  
Epoch: [3][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.5372(0.5219) LR: 0.00000695  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5226(0.5226) 


Epoch 3 - avg_train_loss: 0.5219  avg_val_loss: 0.5682  time: 36s
INFO:__main__:Epoch 3 - avg_train_loss: 0.5219  avg_val_loss: 0.5682  time: 36s
Epoch 3 - Score: 0.7209
INFO:__main__:Epoch 3 - Score: 0.7209


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.3729(0.5682) 
f1 score : 0.35616438356164387
recall score : 0.2549019607843137
precision score : 0.5909090909090909
thresh : 0.56
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 46s) Loss: 0.5945(0.5945) LR: 0.00000693  
Epoch: [4][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.3535(0.4765) LR: 0.00000488  
Epoch: [4][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.4270(0.4786) LR: 0.00000310  
Epoch: [4][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.4301(0.4695) LR: 0.00000194  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5244(0.5244) 


Epoch 4 - avg_train_loss: 0.4695  avg_val_loss: 0.5713  time: 36s
INFO:__main__:Epoch 4 - avg_train_loss: 0.4695  avg_val_loss: 0.5713  time: 36s
Epoch 4 - Score: 0.7229
INFO:__main__:Epoch 4 - Score: 0.7229


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.3542(0.5713) 
f1 score : 0.41152263374485604
recall score : 0.32679738562091504
precision score : 0.5555555555555556
thresh : 0.52
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 46s) Loss: 0.3298(0.3298) LR: 0.00000193  
Epoch: [5][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.4324(0.4273) LR: 0.00000082  
Epoch: [5][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.4913(0.4321) LR: 0.00000017  
Epoch: [5][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.3710(0.4372) LR: 0.00000000  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.5322(0.5322) 


Epoch 5 - avg_train_loss: 0.4372  avg_val_loss: 0.5764  time: 36s
INFO:__main__:Epoch 5 - avg_train_loss: 0.4372  avg_val_loss: 0.5764  time: 36s
Epoch 5 - Score: 0.7249
INFO:__main__:Epoch 5 - Score: 0.7249
Epoch 5 - Save Best Score: 0.7249 Model
INFO:__main__:Epoch 5 - Save Best Score: 0.7249 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.3655(0.5764) 
f1 score : 0.4258555133079847
recall score : 0.3660130718954248
precision score : 0.509090909090909
thresh : 0.57


Score: 0.6968
INFO:__main__:Score: 0.6968
ACC BEST Score: 0.7249
INFO:__main__:ACC BEST Score: 0.7249
BartConfig {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_facebook_bart_base_epoch10",
  "activation_dropout": 0.1,
  "activation_function": "gelu",
  "add_bias_logits": false,
  "add_final_layer_norm": false,
  "architectures": [
    "BartForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "bos_token_id": 0,
  "classif_dropout": 0.1,
  "classifier_dropout": 0.0,
  "d_model": 768,
  "decoder_attention_heads": 12,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 6,
  "decoder_start_token_id": 2,
  "dropout": 0.1,
  "early_stopping": true,
  "encoder_attention_heads": 12,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_layers": 6,
  "eos_token_id": 2,
  "forced_bos_token_id": 0,
  "force

f1 score : 0.4258555133079847
recall score : 0.3660130718954248
precision score : 0.509090909090909
thresh : 0.57


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_facebook_bart_base_epoch10 were not used when initializing BartModel: ['lm_head.weight', 'final_logits_bias']
- This IS expected if you are initializing BartModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BartModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Enable AWP
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 46s) Loss: 0.7416(0.7416) LR: 0.00002000  
Epoch: [1][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.5568(0.6296) LR: 0.00001974  
Epoch: [1][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.6270(0.6148) LR: 0.00001900  
Epoch: [1][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.6052(0.6181) LR: 0.00001810  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.6528(0.6528) 


Epoch 1 - avg_train_loss: 0.6181  avg_val_loss: 0.5823  time: 36s
INFO:__main__:Epoch 1 - avg_train_loss: 0.6181  avg_val_loss: 0.5823  time: 36s
Epoch 1 - Score: 0.7028
INFO:__main__:Epoch 1 - Score: 0.7028
Epoch 1 - Save Best Score: 0.7028 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7028 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.4990(0.5823) 
f1 score : 0.025157232704402517
recall score : 0.013157894736842105
precision score : 0.2857142857142857
thresh : 0.39
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 46s) Loss: 0.5087(0.5087) LR: 0.00001808  
Epoch: [2][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.9324(0.5577) LR: 0.00001657  
Epoch: [2][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.3758(0.5724) LR: 0.00001473  
Epoch: [2][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.4832(0.5689) LR: 0.00001312  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.6305(0.6305) 


Epoch 2 - avg_train_loss: 0.5689  avg_val_loss: 0.5652  time: 36s
INFO:__main__:Epoch 2 - avg_train_loss: 0.5689  avg_val_loss: 0.5652  time: 36s
Epoch 2 - Score: 0.7108
INFO:__main__:Epoch 2 - Score: 0.7108
Epoch 2 - Save Best Score: 0.7108 Model
INFO:__main__:Epoch 2 - Save Best Score: 0.7108 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.5274(0.5652) 
f1 score : 0.16
recall score : 0.09210526315789473
precision score : 0.6086956521739131
thresh : 0.4
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 40s) Loss: 0.5006(0.5006) LR: 0.00001309  
Epoch: [3][100/279] Elapsed 0m 11s (remain 0m 21s) Loss: 0.6335(0.5256) LR: 0.00001090  
Epoch: [3][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.2844(0.5139) LR: 0.00000866  
Epoch: [3][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.4188(0.5175) LR: 0.00000695  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.6217(0.6217) 


Epoch 3 - avg_train_loss: 0.5175  avg_val_loss: 0.5822  time: 36s
INFO:__main__:Epoch 3 - avg_train_loss: 0.5175  avg_val_loss: 0.5822  time: 36s
Epoch 3 - Score: 0.7068
INFO:__main__:Epoch 3 - Score: 0.7068


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.5745(0.5822) 
f1 score : 0.42968750000000006
recall score : 0.3618421052631579
precision score : 0.5288461538461539
thresh : 0.5
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 44s) Loss: 0.5454(0.5454) LR: 0.00000693  
Epoch: [4][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.4312(0.4712) LR: 0.00000488  
Epoch: [4][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.5450(0.4613) LR: 0.00000310  
Epoch: [4][278/279] Elapsed 0m 33s (remain 0m 0s) Loss: 0.5914(0.4564) LR: 0.00000194  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.6342(0.6342) 


Epoch 4 - avg_train_loss: 0.4564  avg_val_loss: 0.6006  time: 37s
INFO:__main__:Epoch 4 - avg_train_loss: 0.4564  avg_val_loss: 0.6006  time: 37s
Epoch 4 - Score: 0.7068
INFO:__main__:Epoch 4 - Score: 0.7068


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.5761(0.6006) 
f1 score : 0.4647887323943662
recall score : 0.4342105263157895
precision score : 0.5
thresh : 0.6
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 41s) Loss: 0.3486(0.3486) LR: 0.00000193  
Epoch: [5][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.4323(0.4382) LR: 0.00000082  
Epoch: [5][200/279] Elapsed 0m 23s (remain 0m 8s) Loss: 0.3370(0.4246) LR: 0.00000017  
Epoch: [5][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.4742(0.4150) LR: 0.00000000  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.6291(0.6291) 


Epoch 5 - avg_train_loss: 0.4150  avg_val_loss: 0.6237  time: 36s
INFO:__main__:Epoch 5 - avg_train_loss: 0.4150  avg_val_loss: 0.6237  time: 36s
Epoch 5 - Score: 0.7088
INFO:__main__:Epoch 5 - Score: 0.7088


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.6178(0.6237) 
f1 score : 0.46625766871165647
recall score : 0.5
precision score : 0.4367816091954023
thresh : 0.7


Score: 0.7048
INFO:__main__:Score: 0.7048
ACC BEST Score: 0.7108
INFO:__main__:ACC BEST Score: 0.7108
BartConfig {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_facebook_bart_base_epoch10",
  "activation_dropout": 0.1,
  "activation_function": "gelu",
  "add_bias_logits": false,
  "add_final_layer_norm": false,
  "architectures": [
    "BartForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "bos_token_id": 0,
  "classif_dropout": 0.1,
  "classifier_dropout": 0.0,
  "d_model": 768,
  "decoder_attention_heads": 12,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 6,
  "decoder_start_token_id": 2,
  "dropout": 0.1,
  "early_stopping": true,
  "encoder_attention_heads": 12,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_layers": 6,
  "eos_token_id": 2,
  "forced_bos_token_id": 0,
  "force

f1 score : 0.16
recall score : 0.09210526315789473
precision score : 0.6086956521739131
thresh : 0.4


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_facebook_bart_base_epoch10 were not used when initializing BartModel: ['lm_head.weight', 'final_logits_bias']
- This IS expected if you are initializing BartModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BartModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Enable AWP
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 50s) Loss: 0.7054(0.7054) LR: 0.00002000  
Epoch: [1][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.6376(0.6510) LR: 0.00001974  
Epoch: [1][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.5396(0.6268) LR: 0.00001900  
Epoch: [1][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.6509(0.6234) LR: 0.00001810  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.5658(0.5658) 


Epoch 1 - avg_train_loss: 0.6234  avg_val_loss: 0.6020  time: 36s
INFO:__main__:Epoch 1 - avg_train_loss: 0.6234  avg_val_loss: 0.6020  time: 36s
Epoch 1 - Score: 0.7123
INFO:__main__:Epoch 1 - Score: 0.7123
Epoch 1 - Save Best Score: 0.7123 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7123 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.5522(0.6020) 
f1 score : 0.11627906976744186
recall score : 0.06578947368421052
precision score : 0.5
thresh : 0.46
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 38s) Loss: 0.6689(0.6689) LR: 0.00001809  
Epoch: [2][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.7130(0.5837) LR: 0.00001657  
Epoch: [2][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.7056(0.5748) LR: 0.00001473  
Epoch: [2][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.6264(0.5716) LR: 0.00001312  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5277(0.5277) 


Epoch 2 - avg_train_loss: 0.5716  avg_val_loss: 0.5642  time: 36s
INFO:__main__:Epoch 2 - avg_train_loss: 0.5716  avg_val_loss: 0.5642  time: 36s
Epoch 2 - Score: 0.7123
INFO:__main__:Epoch 2 - Score: 0.7123


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.4184(0.5642) 
f1 score : 0.2087912087912088
recall score : 0.125
precision score : 0.6333333333333333
thresh : 0.43
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 38s) Loss: 0.4573(0.4573) LR: 0.00001310  
Epoch: [3][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.4351(0.5268) LR: 0.00001091  
Epoch: [3][200/279] Elapsed 0m 23s (remain 0m 8s) Loss: 0.3690(0.5151) LR: 0.00000867  
Epoch: [3][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.6021(0.5120) LR: 0.00000696  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.5453(0.5453) 


Epoch 3 - avg_train_loss: 0.5120  avg_val_loss: 0.5713  time: 36s
INFO:__main__:Epoch 3 - avg_train_loss: 0.5120  avg_val_loss: 0.5713  time: 36s
Epoch 3 - Score: 0.7183
INFO:__main__:Epoch 3 - Score: 0.7183
Epoch 3 - Save Best Score: 0.7183 Model
INFO:__main__:Epoch 3 - Save Best Score: 0.7183 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.4186(0.5713) 
f1 score : 0.3870967741935484
recall score : 0.3157894736842105
precision score : 0.5
thresh : 0.62
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 43s) Loss: 0.4289(0.4289) LR: 0.00000694  
Epoch: [4][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.5976(0.4697) LR: 0.00000490  
Epoch: [4][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.3416(0.4578) LR: 0.00000311  
Epoch: [4][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.4410(0.4530) LR: 0.00000195  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6287(0.6287) 


Epoch 4 - avg_train_loss: 0.4530  avg_val_loss: 0.6017  time: 36s
INFO:__main__:Epoch 4 - avg_train_loss: 0.4530  avg_val_loss: 0.6017  time: 36s
Epoch 4 - Score: 0.7203
INFO:__main__:Epoch 4 - Score: 0.7203
Epoch 4 - Save Best Score: 0.7203 Model
INFO:__main__:Epoch 4 - Save Best Score: 0.7203 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.4620(0.6017) 
f1 score : 0.49090909090909096
recall score : 0.5328947368421053
precision score : 0.4550561797752809
thresh : 0.7
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 48s) Loss: 0.3894(0.3894) LR: 0.00000194  
Epoch: [5][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.3657(0.4096) LR: 0.00000082  
Epoch: [5][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.2399(0.4117) LR: 0.00000017  
Epoch: [5][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.3236(0.4093) LR: 0.00000000  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6389(0.6389) 


Epoch 5 - avg_train_loss: 0.4093  avg_val_loss: 0.6078  time: 36s
INFO:__main__:Epoch 5 - avg_train_loss: 0.4093  avg_val_loss: 0.6078  time: 36s
Epoch 5 - Score: 0.7163
INFO:__main__:Epoch 5 - Score: 0.7163


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.4606(0.6078) 
f1 score : 0.4924924924924925
recall score : 0.5394736842105263
precision score : 0.4530386740331492
thresh : 0.71


Score: 0.6620
INFO:__main__:Score: 0.6620
ACC BEST Score: 0.7203
INFO:__main__:ACC BEST Score: 0.7203
BartConfig {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_facebook_bart_base_epoch10",
  "activation_dropout": 0.1,
  "activation_function": "gelu",
  "add_bias_logits": false,
  "add_final_layer_norm": false,
  "architectures": [
    "BartForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "bos_token_id": 0,
  "classif_dropout": 0.1,
  "classifier_dropout": 0.0,
  "d_model": 768,
  "decoder_attention_heads": 12,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 6,
  "decoder_start_token_id": 2,
  "dropout": 0.1,
  "early_stopping": true,
  "encoder_attention_heads": 12,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_layers": 6,
  "eos_token_id": 2,
  "forced_bos_token_id": 0,
  "force

f1 score : 0.49090909090909096
recall score : 0.5328947368421053
precision score : 0.4550561797752809
thresh : 0.7


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_facebook_bart_base_epoch10 were not used when initializing BartModel: ['lm_head.weight', 'final_logits_bias']
- This IS expected if you are initializing BartModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BartModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Enable AWP
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 48s) Loss: 0.8545(0.8545) LR: 0.00002000  
Epoch: [1][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.4032(0.6393) LR: 0.00001974  
Epoch: [1][200/279] Elapsed 0m 23s (remain 0m 8s) Loss: 0.5095(0.6282) LR: 0.00001900  
Epoch: [1][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.5128(0.6250) LR: 0.00001810  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6706(0.6706) 


Epoch 1 - avg_train_loss: 0.6250  avg_val_loss: 0.6049  time: 36s
INFO:__main__:Epoch 1 - avg_train_loss: 0.6250  avg_val_loss: 0.6049  time: 36s
Epoch 1 - Score: 0.7082
INFO:__main__:Epoch 1 - Score: 0.7082
Epoch 1 - Save Best Score: 0.7082 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7082 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.6405(0.6049) 
f1 score : 0.4129554655870445
recall score : 0.3355263157894737
precision score : 0.5368421052631579
thresh : 0.5
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 39s) Loss: 0.5392(0.5392) LR: 0.00001809  
Epoch: [2][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.5360(0.5670) LR: 0.00001657  
Epoch: [2][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.6368(0.5606) LR: 0.00001473  
Epoch: [2][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.7109(0.5625) LR: 0.00001312  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6516(0.6516) 


Epoch 2 - avg_train_loss: 0.5625  avg_val_loss: 0.5913  time: 36s
INFO:__main__:Epoch 2 - avg_train_loss: 0.5625  avg_val_loss: 0.5913  time: 36s
Epoch 2 - Score: 0.7062
INFO:__main__:Epoch 2 - Score: 0.7062


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.5698(0.5913) 
f1 score : 0.3292181069958848
recall score : 0.2631578947368421
precision score : 0.43956043956043955
thresh : 0.54
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 49s) Loss: 0.5096(0.5096) LR: 0.00001310  
Epoch: [3][100/279] Elapsed 0m 11s (remain 0m 21s) Loss: 0.9156(0.5321) LR: 0.00001091  
Epoch: [3][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.3647(0.5204) LR: 0.00000867  
Epoch: [3][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.4747(0.5181) LR: 0.00000696  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6908(0.6908) 


Epoch 3 - avg_train_loss: 0.5181  avg_val_loss: 0.5920  time: 36s
INFO:__main__:Epoch 3 - avg_train_loss: 0.5181  avg_val_loss: 0.5920  time: 36s
Epoch 3 - Score: 0.7062
INFO:__main__:Epoch 3 - Score: 0.7062


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.5982(0.5920) 
f1 score : 0.43727598566308246
recall score : 0.40131578947368424
precision score : 0.48031496062992124
thresh : 0.76
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 44s) Loss: 0.3985(0.3985) LR: 0.00000694  
Epoch: [4][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.3593(0.4639) LR: 0.00000490  
Epoch: [4][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.3172(0.4567) LR: 0.00000311  
Epoch: [4][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.5074(0.4562) LR: 0.00000195  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6996(0.6996) 


Epoch 4 - avg_train_loss: 0.4562  avg_val_loss: 0.6039  time: 36s
INFO:__main__:Epoch 4 - avg_train_loss: 0.4562  avg_val_loss: 0.6039  time: 36s
Epoch 4 - Score: 0.7103
INFO:__main__:Epoch 4 - Score: 0.7103
Epoch 4 - Save Best Score: 0.7103 Model
INFO:__main__:Epoch 4 - Save Best Score: 0.7103 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.6047(0.6039) 
f1 score : 0.43564356435643564
recall score : 0.4342105263157895
precision score : 0.4370860927152318
thresh : 0.74
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 38s) Loss: 0.4034(0.4034) LR: 0.00000194  
Epoch: [5][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.4417(0.4118) LR: 0.00000082  
Epoch: [5][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.4608(0.4095) LR: 0.00000017  
Epoch: [5][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.4302(0.4133) LR: 0.00000000  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.7206(0.7206) 


Epoch 5 - avg_train_loss: 0.4133  avg_val_loss: 0.6234  time: 36s
INFO:__main__:Epoch 5 - avg_train_loss: 0.4133  avg_val_loss: 0.6234  time: 36s
Epoch 5 - Score: 0.7103
INFO:__main__:Epoch 5 - Score: 0.7103


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.6183(0.6234) 
f1 score : 0.45454545454545453
recall score : 0.4934210526315789
precision score : 0.42134831460674155
thresh : 0.78


Score: 0.6559
INFO:__main__:Score: 0.6559
ACC BEST Score: 0.7103
INFO:__main__:ACC BEST Score: 0.7103
BartConfig {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_facebook_bart_base_epoch10",
  "activation_dropout": 0.1,
  "activation_function": "gelu",
  "add_bias_logits": false,
  "add_final_layer_norm": false,
  "architectures": [
    "BartForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "bos_token_id": 0,
  "classif_dropout": 0.1,
  "classifier_dropout": 0.0,
  "d_model": 768,
  "decoder_attention_heads": 12,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 6,
  "decoder_start_token_id": 2,
  "dropout": 0.1,
  "early_stopping": true,
  "encoder_attention_heads": 12,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_layers": 6,
  "eos_token_id": 2,
  "forced_bos_token_id": 0,
  "force

f1 score : 0.43564356435643564
recall score : 0.4342105263157895
precision score : 0.4370860927152318
thresh : 0.74


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_facebook_bart_base_epoch10 were not used when initializing BartModel: ['lm_head.weight', 'final_logits_bias']
- This IS expected if you are initializing BartModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BartModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Enable AWP
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 46s) Loss: 0.7157(0.7157) LR: 0.00002000  
Epoch: [1][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.5724(0.6268) LR: 0.00001974  
Epoch: [1][200/279] Elapsed 0m 23s (remain 0m 8s) Loss: 0.3737(0.6201) LR: 0.00001900  
Epoch: [1][278/279] Elapsed 0m 31s (remain 0m 0s) Loss: 0.5789(0.6198) LR: 0.00001810  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5999(0.5999) 


Epoch 1 - avg_train_loss: 0.6198  avg_val_loss: 0.5928  time: 36s
INFO:__main__:Epoch 1 - avg_train_loss: 0.6198  avg_val_loss: 0.5928  time: 36s
Epoch 1 - Score: 0.7042
INFO:__main__:Epoch 1 - Score: 0.7042
Epoch 1 - Save Best Score: 0.7042 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7042 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.5381(0.5928) 
f1 score : 0.03797468354430379
recall score : 0.019736842105263157
precision score : 0.5
thresh : 0.42
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 39s) Loss: 0.6481(0.6481) LR: 0.00001809  
Epoch: [2][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.5083(0.5813) LR: 0.00001657  
Epoch: [2][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.6328(0.5738) LR: 0.00001473  
Epoch: [2][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.3847(0.5770) LR: 0.00001312  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5585(0.5585) 


Epoch 2 - avg_train_loss: 0.5770  avg_val_loss: 0.5738  time: 36s
INFO:__main__:Epoch 2 - avg_train_loss: 0.5770  avg_val_loss: 0.5738  time: 36s
Epoch 2 - Score: 0.7243
INFO:__main__:Epoch 2 - Score: 0.7243
Epoch 2 - Save Best Score: 0.7243 Model
INFO:__main__:Epoch 2 - Save Best Score: 0.7243 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.5037(0.5738) 
f1 score : 0.17241379310344826
recall score : 0.09868421052631579
precision score : 0.6818181818181818
thresh : 0.45
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 55s) Loss: 0.5121(0.5121) LR: 0.00001310  
Epoch: [3][100/279] Elapsed 0m 11s (remain 0m 21s) Loss: 0.4464(0.5215) LR: 0.00001091  
Epoch: [3][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.6819(0.5148) LR: 0.00000867  
Epoch: [3][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.6018(0.5201) LR: 0.00000696  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4921(0.4921) 


Epoch 3 - avg_train_loss: 0.5201  avg_val_loss: 0.5676  time: 37s
INFO:__main__:Epoch 3 - avg_train_loss: 0.5201  avg_val_loss: 0.5676  time: 37s
Epoch 3 - Score: 0.7203
INFO:__main__:Epoch 3 - Score: 0.7203


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.5713(0.5676) 
f1 score : 0.37931034482758624
recall score : 0.2894736842105263
precision score : 0.55
thresh : 0.53
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 44s) Loss: 0.3703(0.3703) LR: 0.00000694  
Epoch: [4][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.4611(0.4670) LR: 0.00000490  
Epoch: [4][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.5256(0.4687) LR: 0.00000311  
Epoch: [4][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.6293(0.4669) LR: 0.00000195  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4612(0.4612) 


Epoch 4 - avg_train_loss: 0.4669  avg_val_loss: 0.5655  time: 36s
INFO:__main__:Epoch 4 - avg_train_loss: 0.4669  avg_val_loss: 0.5655  time: 36s
Epoch 4 - Score: 0.7304
INFO:__main__:Epoch 4 - Score: 0.7304
Epoch 4 - Save Best Score: 0.7304 Model
INFO:__main__:Epoch 4 - Save Best Score: 0.7304 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.5462(0.5655) 
f1 score : 0.4375
recall score : 0.3684210526315789
precision score : 0.5384615384615384
thresh : 0.55
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 39s) Loss: 0.4974(0.4974) LR: 0.00000194  
Epoch: [5][100/279] Elapsed 0m 11s (remain 0m 21s) Loss: 0.3717(0.4466) LR: 0.00000082  
Epoch: [5][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.6134(0.4369) LR: 0.00000017  
Epoch: [5][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.4366(0.4368) LR: 0.00000000  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4559(0.4559) 


Epoch 5 - avg_train_loss: 0.4368  avg_val_loss: 0.5658  time: 36s
INFO:__main__:Epoch 5 - avg_train_loss: 0.4368  avg_val_loss: 0.5658  time: 36s
Epoch 5 - Score: 0.7304
INFO:__main__:Epoch 5 - Score: 0.7304


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.5458(0.5658) 
f1 score : 0.4375
recall score : 0.3684210526315789
precision score : 0.5384615384615384
thresh : 0.54


Score: 0.7103
INFO:__main__:Score: 0.7103
ACC BEST Score: 0.7304
INFO:__main__:ACC BEST Score: 0.7304
BartConfig {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_facebook_bart_base_epoch10",
  "activation_dropout": 0.1,
  "activation_function": "gelu",
  "add_bias_logits": false,
  "add_final_layer_norm": false,
  "architectures": [
    "BartForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "bos_token_id": 0,
  "classif_dropout": 0.1,
  "classifier_dropout": 0.0,
  "d_model": 768,
  "decoder_attention_heads": 12,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 6,
  "decoder_start_token_id": 2,
  "dropout": 0.1,
  "early_stopping": true,
  "encoder_attention_heads": 12,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_layers": 6,
  "eos_token_id": 2,
  "forced_bos_token_id": 0,
  "force

f1 score : 0.4375
recall score : 0.3684210526315789
precision score : 0.5384615384615384
thresh : 0.55


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_facebook_bart_base_epoch10 were not used when initializing BartModel: ['lm_head.weight', 'final_logits_bias']
- This IS expected if you are initializing BartModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BartModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Enable AWP
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 52s) Loss: 0.5699(0.5699) LR: 0.00002000  
Epoch: [1][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.7675(0.6284) LR: 0.00001974  
Epoch: [1][200/279] Elapsed 0m 23s (remain 0m 8s) Loss: 0.4388(0.6181) LR: 0.00001900  
Epoch: [1][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.8125(0.6201) LR: 0.00001810  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6641(0.6641) 


Epoch 1 - avg_train_loss: 0.6201  avg_val_loss: 0.5900  time: 36s
INFO:__main__:Epoch 1 - avg_train_loss: 0.6201  avg_val_loss: 0.5900  time: 36s
Epoch 1 - Score: 0.7062
INFO:__main__:Epoch 1 - Score: 0.7062
Epoch 1 - Save Best Score: 0.7062 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7062 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.8122(0.5900) 
f1 score : 0.06329113924050632
recall score : 0.03289473684210526
precision score : 0.8333333333333334
thresh : 0.43
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 38s) Loss: 0.6864(0.6864) LR: 0.00001809  
Epoch: [2][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.5872(0.5827) LR: 0.00001657  
Epoch: [2][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.4850(0.5704) LR: 0.00001473  
Epoch: [2][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.5267(0.5750) LR: 0.00001312  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.6745(0.6745) 


Epoch 2 - avg_train_loss: 0.5750  avg_val_loss: 0.5818  time: 36s
INFO:__main__:Epoch 2 - avg_train_loss: 0.5750  avg_val_loss: 0.5818  time: 36s
Epoch 2 - Score: 0.7143
INFO:__main__:Epoch 2 - Score: 0.7143
Epoch 2 - Save Best Score: 0.7143 Model
INFO:__main__:Epoch 2 - Save Best Score: 0.7143 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.7249(0.5818) 
f1 score : 0.2745098039215686
recall score : 0.18421052631578946
precision score : 0.5384615384615384
thresh : 0.56
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 41s) Loss: 0.4240(0.4240) LR: 0.00001310  
Epoch: [3][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.5635(0.5285) LR: 0.00001091  
Epoch: [3][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.4344(0.5167) LR: 0.00000867  
Epoch: [3][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.5387(0.5155) LR: 0.00000696  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.7093(0.7093) 


Epoch 3 - avg_train_loss: 0.5155  avg_val_loss: 0.5677  time: 36s
INFO:__main__:Epoch 3 - avg_train_loss: 0.5155  avg_val_loss: 0.5677  time: 36s
Epoch 3 - Score: 0.7143
INFO:__main__:Epoch 3 - Score: 0.7143


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.7222(0.5677) 
f1 score : 0.27999999999999997
recall score : 0.18421052631578946
precision score : 0.5833333333333334
thresh : 0.52
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 42s) Loss: 0.3916(0.3916) LR: 0.00000694  
Epoch: [4][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.7994(0.4693) LR: 0.00000490  
Epoch: [4][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.4432(0.4562) LR: 0.00000311  
Epoch: [4][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.5949(0.4560) LR: 0.00000195  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.7464(0.7464) 


Epoch 4 - avg_train_loss: 0.4560  avg_val_loss: 0.5649  time: 36s
INFO:__main__:Epoch 4 - avg_train_loss: 0.4560  avg_val_loss: 0.5649  time: 36s
Epoch 4 - Score: 0.7223
INFO:__main__:Epoch 4 - Score: 0.7223
Epoch 4 - Save Best Score: 0.7223 Model
INFO:__main__:Epoch 4 - Save Best Score: 0.7223 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.6751(0.5649) 
f1 score : 0.3904382470119522
recall score : 0.3223684210526316
precision score : 0.494949494949495
thresh : 0.58
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 45s) Loss: 0.4469(0.4469) LR: 0.00000194  
Epoch: [5][100/279] Elapsed 0m 11s (remain 0m 21s) Loss: 0.4532(0.4259) LR: 0.00000082  
Epoch: [5][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.3448(0.4203) LR: 0.00000017  
Epoch: [5][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.4969(0.4225) LR: 0.00000000  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.7556(0.7556) 


Epoch 5 - avg_train_loss: 0.4225  avg_val_loss: 0.5671  time: 36s
INFO:__main__:Epoch 5 - avg_train_loss: 0.4225  avg_val_loss: 0.5671  time: 36s
Epoch 5 - Score: 0.7183
INFO:__main__:Epoch 5 - Score: 0.7183


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.6654(0.5671) 
f1 score : 0.40740740740740744
recall score : 0.3618421052631579
precision score : 0.4661016949152542
thresh : 0.65


Score: 0.6922
INFO:__main__:Score: 0.6922
ACC BEST Score: 0.7223
INFO:__main__:ACC BEST Score: 0.7223
BartConfig {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_facebook_bart_base_epoch10",
  "activation_dropout": 0.1,
  "activation_function": "gelu",
  "add_bias_logits": false,
  "add_final_layer_norm": false,
  "architectures": [
    "BartForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "bos_token_id": 0,
  "classif_dropout": 0.1,
  "classifier_dropout": 0.0,
  "d_model": 768,
  "decoder_attention_heads": 12,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 6,
  "decoder_start_token_id": 2,
  "dropout": 0.1,
  "early_stopping": true,
  "encoder_attention_heads": 12,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_layers": 6,
  "eos_token_id": 2,
  "forced_bos_token_id": 0,
  "force

f1 score : 0.3904382470119522
recall score : 0.3223684210526316
precision score : 0.494949494949495
thresh : 0.58


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_facebook_bart_base_epoch10 were not used when initializing BartModel: ['lm_head.weight', 'final_logits_bias']
- This IS expected if you are initializing BartModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BartModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Enable AWP
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 46s) Loss: 0.5819(0.5819) LR: 0.00002000  
Epoch: [1][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.5469(0.6346) LR: 0.00001974  
Epoch: [1][200/279] Elapsed 0m 22s (remain 0m 8s) Loss: 0.7073(0.6275) LR: 0.00001900  
Epoch: [1][278/279] Elapsed 0m 31s (remain 0m 0s) Loss: 0.5883(0.6197) LR: 0.00001810  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.6376(0.6376) 


Epoch 1 - avg_train_loss: 0.6197  avg_val_loss: 0.6041  time: 35s
INFO:__main__:Epoch 1 - avg_train_loss: 0.6197  avg_val_loss: 0.6041  time: 35s
Epoch 1 - Score: 0.7143
INFO:__main__:Epoch 1 - Score: 0.7143
Epoch 1 - Save Best Score: 0.7143 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7143 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.6564(0.6041) 
f1 score : 0.32340425531914896
recall score : 0.25
precision score : 0.4578313253012048
thresh : 0.55
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 33s) Loss: 0.5625(0.5625) LR: 0.00001809  
Epoch: [2][100/279] Elapsed 0m 11s (remain 0m 21s) Loss: 0.7100(0.5879) LR: 0.00001657  
Epoch: [2][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.6712(0.5696) LR: 0.00001473  
Epoch: [2][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.5697(0.5677) LR: 0.00001312  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.6424(0.6424) 


Epoch 2 - avg_train_loss: 0.5677  avg_val_loss: 0.6071  time: 36s
INFO:__main__:Epoch 2 - avg_train_loss: 0.5677  avg_val_loss: 0.6071  time: 36s
Epoch 2 - Score: 0.7103
INFO:__main__:Epoch 2 - Score: 0.7103


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.6586(0.6071) 
f1 score : 0.453416149068323
recall score : 0.48026315789473684
precision score : 0.4294117647058823
thresh : 0.63
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 35s) Loss: 0.5944(0.5944) LR: 0.00001310  
Epoch: [3][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.5422(0.5325) LR: 0.00001091  
Epoch: [3][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.6654(0.5145) LR: 0.00000867  
Epoch: [3][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.5141(0.5110) LR: 0.00000696  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.6228(0.6228) 


Epoch 3 - avg_train_loss: 0.5110  avg_val_loss: 0.5741  time: 36s
INFO:__main__:Epoch 3 - avg_train_loss: 0.5110  avg_val_loss: 0.5741  time: 36s
Epoch 3 - Score: 0.7183
INFO:__main__:Epoch 3 - Score: 0.7183
Epoch 3 - Save Best Score: 0.7183 Model
INFO:__main__:Epoch 3 - Save Best Score: 0.7183 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.6665(0.5741) 
f1 score : 0.4761904761904762
recall score : 0.4605263157894737
precision score : 0.49295774647887325
thresh : 0.6
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 51s) Loss: 0.3462(0.3462) LR: 0.00000694  
Epoch: [4][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.3953(0.4614) LR: 0.00000490  
Epoch: [4][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.3728(0.4517) LR: 0.00000311  
Epoch: [4][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.4357(0.4526) LR: 0.00000195  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.5883(0.5883) 


Epoch 4 - avg_train_loss: 0.4526  avg_val_loss: 0.5610  time: 36s
INFO:__main__:Epoch 4 - avg_train_loss: 0.4526  avg_val_loss: 0.5610  time: 36s
Epoch 4 - Score: 0.7163
INFO:__main__:Epoch 4 - Score: 0.7163


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.6785(0.5610) 
f1 score : 0.4186046511627907
recall score : 0.35526315789473684
precision score : 0.5094339622641509
thresh : 0.56
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 49s) Loss: 0.3546(0.3546) LR: 0.00000194  
Epoch: [5][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.3037(0.4041) LR: 0.00000082  
Epoch: [5][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.2544(0.4061) LR: 0.00000017  
Epoch: [5][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.5356(0.4058) LR: 0.00000000  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.6030(0.6030) 


Epoch 5 - avg_train_loss: 0.4058  avg_val_loss: 0.5833  time: 36s
INFO:__main__:Epoch 5 - avg_train_loss: 0.4058  avg_val_loss: 0.5833  time: 36s
Epoch 5 - Score: 0.7163
INFO:__main__:Epoch 5 - Score: 0.7163


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.6876(0.5833) 
f1 score : 0.5
recall score : 0.5263157894736842
precision score : 0.47619047619047616
thresh : 0.78


Score: 0.6901
INFO:__main__:Score: 0.6901
ACC BEST Score: 0.7183
INFO:__main__:ACC BEST Score: 0.7183
BartConfig {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_facebook_bart_base_epoch10",
  "activation_dropout": 0.1,
  "activation_function": "gelu",
  "add_bias_logits": false,
  "add_final_layer_norm": false,
  "architectures": [
    "BartForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "bos_token_id": 0,
  "classif_dropout": 0.1,
  "classifier_dropout": 0.0,
  "d_model": 768,
  "decoder_attention_heads": 12,
  "decoder_ffn_dim": 3072,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 6,
  "decoder_start_token_id": 2,
  "dropout": 0.1,
  "early_stopping": true,
  "encoder_attention_heads": 12,
  "encoder_ffn_dim": 3072,
  "encoder_layerdrop": 0.0,
  "encoder_layers": 6,
  "eos_token_id": 2,
  "forced_bos_token_id": 0,
  "force

f1 score : 0.4761904761904762
recall score : 0.4605263157894737
precision score : 0.49295774647887325
thresh : 0.6


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_facebook_bart_base_epoch10 were not used when initializing BartModel: ['lm_head.weight', 'final_logits_bias']
- This IS expected if you are initializing BartModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BartModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Enable AWP
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 45s) Loss: 0.7455(0.7455) LR: 0.00002000  
Epoch: [1][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.5092(0.6572) LR: 0.00001974  
Epoch: [1][200/279] Elapsed 0m 23s (remain 0m 8s) Loss: 0.5803(0.6396) LR: 0.00001900  
Epoch: [1][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.6623(0.6251) LR: 0.00001810  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.5707(0.5707) 


Epoch 1 - avg_train_loss: 0.6251  avg_val_loss: 0.5910  time: 36s
INFO:__main__:Epoch 1 - avg_train_loss: 0.6251  avg_val_loss: 0.5910  time: 36s
Epoch 1 - Score: 0.7062
INFO:__main__:Epoch 1 - Score: 0.7062
Epoch 1 - Save Best Score: 0.7062 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7062 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.5438(0.5910) 
f1 score : 0.05095541401273885
recall score : 0.02631578947368421
precision score : 0.8
thresh : 0.39
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 40s) Loss: 0.6783(0.6783) LR: 0.00001809  
Epoch: [2][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.7185(0.5668) LR: 0.00001657  
Epoch: [2][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.5715(0.5735) LR: 0.00001473  
Epoch: [2][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.5351(0.5719) LR: 0.00001312  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.6041(0.6041) 


Epoch 2 - avg_train_loss: 0.5719  avg_val_loss: 0.5826  time: 36s
INFO:__main__:Epoch 2 - avg_train_loss: 0.5719  avg_val_loss: 0.5826  time: 36s
Epoch 2 - Score: 0.7042
INFO:__main__:Epoch 2 - Score: 0.7042


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.4859(0.5826) 
f1 score : 0.29107981220657275
recall score : 0.20394736842105263
precision score : 0.5081967213114754
thresh : 0.61
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 44s) Loss: 0.4612(0.4612) LR: 0.00001310  
Epoch: [3][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.5925(0.5111) LR: 0.00001091  
Epoch: [3][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.4859(0.5167) LR: 0.00000867  
Epoch: [3][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.5656(0.5155) LR: 0.00000696  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.6162(0.6162) 


Epoch 3 - avg_train_loss: 0.5155  avg_val_loss: 0.5817  time: 36s
INFO:__main__:Epoch 3 - avg_train_loss: 0.5155  avg_val_loss: 0.5817  time: 36s
Epoch 3 - Score: 0.7183
INFO:__main__:Epoch 3 - Score: 0.7183
Epoch 3 - Save Best Score: 0.7183 Model
INFO:__main__:Epoch 3 - Save Best Score: 0.7183 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.4869(0.5817) 
f1 score : 0.37398373983739835
recall score : 0.3026315789473684
precision score : 0.48936170212765956
thresh : 0.59
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 47s) Loss: 0.3693(0.3693) LR: 0.00000694  
Epoch: [4][100/279] Elapsed 0m 11s (remain 0m 21s) Loss: 0.4754(0.4626) LR: 0.00000490  
Epoch: [4][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.5474(0.4516) LR: 0.00000311  
Epoch: [4][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.7146(0.4531) LR: 0.00000195  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6396(0.6396) 


Epoch 4 - avg_train_loss: 0.4531  avg_val_loss: 0.6008  time: 36s
INFO:__main__:Epoch 4 - avg_train_loss: 0.4531  avg_val_loss: 0.6008  time: 36s
Epoch 4 - Score: 0.7123
INFO:__main__:Epoch 4 - Score: 0.7123


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.5351(0.6008) 
f1 score : 0.46984126984126984
recall score : 0.4868421052631579
precision score : 0.4539877300613497
thresh : 0.6
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 41s) Loss: 0.5325(0.5325) LR: 0.00000194  
Epoch: [5][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.3793(0.4203) LR: 0.00000082  
Epoch: [5][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.4523(0.4202) LR: 0.00000017  
Epoch: [5][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.4851(0.4183) LR: 0.00000000  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.6849(0.6849) 


Epoch 5 - avg_train_loss: 0.4183  avg_val_loss: 0.6267  time: 36s
INFO:__main__:Epoch 5 - avg_train_loss: 0.4183  avg_val_loss: 0.6267  time: 36s
Epoch 5 - Score: 0.7123
INFO:__main__:Epoch 5 - Score: 0.7123


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.5731(0.6267) 
f1 score : 0.4702380952380953
recall score : 0.5197368421052632
precision score : 0.42934782608695654
thresh : 0.66


In [None]:
from google.colab import runtime
runtime.unassign()