In [1]:
from google.colab import drive
drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
!pip install transformers
!pip install datasets
!pip install sentencepiece

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [3]:
!nvidia-smi

Thu May  4 19:55:55 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA A100-SXM...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   31C    P0    44W / 400W |      0MiB / 40960MiB |      0%      Default |
|                               |                      |             Disabled |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [4]:

import os
import gc
import math
import time
import random
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.simplefilter('ignore')
from tqdm import tqdm
import re
import html

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import Adam, SGD, AdamW, RAdam
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader, Dataset

from sklearn.model_selection import StratifiedKFold,StratifiedGroupKFold,GroupKFold
from sklearn.metrics import log_loss,f1_score, recall_score, accuracy_score, precision_score

from transformers import AutoModel, AutoConfig, AutoTokenizer, AdamW, DataCollatorWithPadding
from transformers import get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [5]:
DIR = "/content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測"
INPUT_DIR = os.path.join(DIR,"input")
OUTPUT_DIR = os.path.join(DIR,"output")
CUSTOM_MODEL_DIR = os.path.join(OUTPUT_DIR,'clrp_roberta_base_epoch10')
OUTPUT_EXP_DIR = DIR + '/output/EXP066/'
if not os.path.exists(OUTPUT_EXP_DIR):
    os.makedirs(OUTPUT_EXP_DIR)

In [6]:

# ====================================================
# CFG
# ====================================================
class CFG:
    debug=False
    apex=True
    print_freq=100
    num_workers=4
    model_name="roberta-base"
    # model='microsoft/deberta-base'
    # model='roberta-base'
    # model='roberta-large'
    # model='roberta-large-mnli'
    # model='xlnet-large-cased'
    # model='albert-xxlarge-v2'
    # model="microsoft/deberta-large"
    # model="microsoft/deberta-v3-large"
    # model='microsoft/deberta-v2-xlarge'
    # model='funnel-transformer/large'
    # model='funnel-transformer/medium'
    # model='albert-base-v2'
    # model='albert-large-v2'
    # model='google/electra-large-discriminator'
    # model='google/electra-base-discriminator'
    # model="facebook/bart-large-mnli"
    # model="facebook/bart-large"
    # model="facebook/bart-base"
    model = CUSTOM_MODEL_DIR
    scheduler='cosine' # ['linear', 'cosine']
    batch_scheduler=True
    num_cycles=0.5
    num_warmup_steps=0
    epochs=5
    encoder_lr=2e-5
    decoder_lr=2e-5
    min_lr=1e-6
    eps=1e-6
    betas=(0.9, 0.999)
    batch_size=16
    fc_dropout=0.2
    target_size=1
    max_len=512
    weight_decay=0.01
    gradient_accumulation_steps=1
    max_grad_norm=1000
    seed=42
    n_fold=10
    trn_fold=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
    train=True
    nth_awp_start_epoch=1
    gradient_checkpointing = False
    freezing = False
    num_reinit_layers = 1
    is_reinit_layer = False
    fgm = False
    awp_start=1

if CFG.debug:
    CFG.epochs = 2
    CFG.trn_fold = [0, 1]

In [7]:
def get_score(labels, outputs):
    thresh = 0.5
    y_pred = outputs
    y_true = labels
    f_score = f1_score(y_true, (y_pred>thresh).astype(int))
    r_score = recall_score(y_true, (y_pred>thresh).astype(int))
    p_score = precision_score(y_true, (y_pred>thresh).astype(int))
    print(f"f1 score : {f_score}")
    print(f"recall score : {r_score}")
    print(f"precision score : {p_score}")
    return accuracy_score(y_true, (y_pred>thresh).astype(int))

def get_acc_score(labels, outputs):
    y_pred = outputs
    y_true = labels
    best_score = 0
    best_thresh = 0.5
    for thresh in np.arange(0.1, 0.80, 0.01):
        thresh = np.round(thresh, 2)
        score = accuracy_score(y_true, (y_pred>thresh).astype(int))
        #print("Accuracy score at threshold {0} is {1}".format(thresh, score))
        if score > best_score:
          best_score = score
          best_thresh = thresh
    print(f"thresh : {best_thresh}")
    return accuracy_score(y_true, (y_pred>best_thresh).astype(int))


def get_logger(filename=OUTPUT_EXP_DIR+'train'):
    from logging import getLogger, INFO, FileHandler, Formatter, StreamHandler
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=f"{filename}.log")
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

LOGGER = get_logger()

def seed_everything(seed=CFG.seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_everything(seed=CFG.seed)

In [8]:
def freeze(module):
    """
    Freezes module's parameters.
    """
    
    for parameter in module.parameters():
        parameter.requires_grad = False
        
def get_freezed_parameters(module):
    """
    Returns names of freezed parameters of the given module.
    """
    
    freezed_parameters = []
    for name, parameter in module.named_parameters():
        if not parameter.requires_grad:
            freezed_parameters.append(name)
            
    return freezed_parameters

def set_embedding_parameters_bits(embeddings_path, optim_bits=32):
    """
    https://github.com/huggingface/transformers/issues/14819#issuecomment-1003427930
    """
    
    embedding_types = ("word", "position", "token_type")
    for embedding_type in embedding_types:
        attr_name = f"{embedding_type}_embeddings"
        
        if hasattr(embeddings_path, attr_name): 
            bnb.optim.GlobalOptimManager.get_instance().register_module_override(
                getattr(embeddings_path, attr_name), 'weight', {'optim_bits': optim_bits}
            )

In [9]:
import pandas as pd
import numpy as np

train = pd.read_csv(os.path.join(INPUT_DIR,"train_data.csv"))
test = pd.read_csv(os.path.join(INPUT_DIR,"test_data.csv"))
sample_sub = pd.read_csv(os.path.join(INPUT_DIR,"submission.csv"))

train = train.sample(frac=1, random_state=CFG.seed).reset_index()

print(train.shape)
display(train.head(3))

print(test.shape)
display(test.head(3))

print(sample_sub.shape)
display(sample_sub.head(3))

(4974, 7)


Unnamed: 0,index,id,title,year,abstract,keywords,y
0,721,722,Global Optimality Conditions for Deep Neural N...,2018,We study the error landscape of deep linear an...,"deep linear neural networks, global optimality...",1
1,144,145,Multi-Task Learning by Deep Collaboration and ...,2018,Convolutional neural networks (CNN) have becom...,"multi-task learning, soft parameter sharing, f...",0
2,4542,4543,On the Need for Topology-Aware Generative Mode...,2020,"ML algorithms or models, especially deep neura...","Manifold-based Defense, Robust Learning, Adver...",1


(6393, 5)


Unnamed: 0,id,title,year,abstract,keywords
0,1,StyleAlign: Analysis and Applications of Align...,2022,"In this paper, we perform an in-depth study of...","StyleGAN, transfer learning, fine tuning, mode..."
1,2,Embedding a random graph via GNN: mean-field i...,2021,We develop a theory for embedding a random gra...,"Graph neural network, graph embedding, multi-r..."
2,3,BBRefinement: an universal scheme to improve p...,2021,We present a conceptually simple yet powerful ...,"object detection, deep neural networks, refine..."


(6393, 2)


Unnamed: 0,id,y
0,1,0
1,2,0
2,3,0


In [10]:
train["texts"] = train["title"] + "[SEP]" + train["abstract"] 

In [11]:
skf = StratifiedKFold(n_splits=CFG.n_fold,shuffle=True,random_state=CFG.seed)
for fold, ( _, val_) in enumerate(skf.split(train, train.y)):
    train.loc[val_ , "kfold"] = int(fold)
    
train["kfold"] = train["kfold"].astype(int)

if CFG.debug:
    display(train.groupby('kfold').size())
    train = train.sample(n=500, random_state=0).reset_index(drop=True)
    display(train.groupby('kfold').size())

In [12]:
# ====================================================
# tokenizer
# ====================================================
tokenizer = AutoTokenizer.from_pretrained(CFG.model)
tokenizer.save_pretrained(OUTPUT_EXP_DIR+'tokenizer/')
CFG.tokenizer = tokenizer

In [13]:
# ====================================================
# Define max_len
# ====================================================
#lengths = []
#tk0 = tqdm(train['texts'].fillna("").values, total=len(train))
#for text in tk0:
#    length = len(tokenizer(text, add_special_tokens=False)['input_ids'])
#    lengths.append(length)
#CFG.max_len = max(lengths) + 3 # cls + sep + sep
#LOGGER.info(f"max_len: {CFG.max_len}")

In [14]:
class AWP:
    def __init__(self, model, optimizer, *, adv_param='weight',
                 adv_lr=0.001, adv_eps=0.001):
        self.model = model
        self.optimizer = optimizer
        self.adv_param = adv_param
        self.adv_lr = adv_lr
        self.adv_eps = adv_eps
        self.backup = {}

    def perturb(self, inputs, y, criterion):
        """
        Perturb model parameters for AWP gradient
        Call before loss and loss.backward()
        """
        self._save()  # save model parameters
        self._attack_step()  # perturb weights

    def _attack_step(self):
        e = 1e-6
        for name, param in self.model.named_parameters():
            if param.requires_grad and param.grad is not None and self.adv_param in name:
                grad = self.optimizer.state[param]['exp_avg']
                norm_grad = torch.norm(grad)
                norm_data = torch.norm(param.detach())

                if norm_grad != 0 and not torch.isnan(norm_grad):
                    # Set lower and upper limit in change
                    limit_eps = self.adv_eps * param.detach().abs()
                    param_min = param.data - limit_eps
                    param_max = param.data + limit_eps

                    # Perturb along gradient
                    # w += (adv_lr * |w| / |grad|) * grad
                    param.data.add_(grad, alpha=(self.adv_lr * (norm_data + e) / (norm_grad + e)))

                    # Apply the limit to the change
                    param.data.clamp_(param_min, param_max)

    def _save(self):
        for name, param in self.model.named_parameters():
            if param.requires_grad and param.grad is not None and self.adv_param in name:
                if name not in self.backup:
                    self.backup[name] = param.clone().detach()
                else:
                    self.backup[name].copy_(param.data)

    def restore(self):
        """
        Restore model parameter to correct position; AWP do not perturbe weights, it perturb gradients
        Call after loss.backward(), before optimizer.step()
        """
        for name, param in self.model.named_parameters():
            if name in self.backup:
                param.data.copy_(self.backup[name])

In [15]:
# ====================================================
# Dataset
# ====================================================
def prepare_input(cfg, text):
    inputs = cfg.tokenizer(text,
                           add_special_tokens=True,
                           max_length=cfg.max_len,
                           padding="max_length",
                           return_offsets_mapping=False,
                           truncation=True)
    for k, v in inputs.items():
        inputs[k] = torch.tensor(v, dtype=torch.long)
    return inputs


class TrainDataset(Dataset):
    def __init__(self, cfg, df):
        self.cfg = cfg
        self.inputs = df['texts'].values
        self.labels = df['y'].values

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, item):
        inputs = prepare_input(self.cfg, self.inputs[item])
        label = torch.tensor(self.labels[item], dtype=torch.float)
        return inputs, label

def collate(inputs):
    mask_len = int(inputs["attention_mask"].sum(axis=1).max())
    for k, v in inputs.items():
        inputs[k] = inputs[k][:,:mask_len]
    return inputs

def collate(inputs):
    mask_len = int(inputs["attention_mask"].sum(axis=1).max())
    for k, v in inputs.items():
        inputs[k] = inputs[k][:,:mask_len]
    return inputs

#collate_fn = DataCollatorWithPadding(tokenizer=tokenizer)

In [16]:
def reinit_layers(model):

    #for layer in model.model.encoder.layer[-CFG.num_reinit_layers:]:
    for layer in model.encoder.layer[-CFG.num_reinit_layers:]:    #Custome model内(backbone)

            for module in layer.modules():

                if isinstance(module,nn.Linear):
                    module.weight.data.normal_(mean=0.0,std=model.config.initializer_range)
                    if module.bias is not None:
                            module.bias.data.zero_()
                elif isinstance(module, nn.Embedding):
                        module.weight.data.normal_(mean=0.0, std=model.config.initializer_range)
                        if module.padding_idx is not None:
                            module.weight.data[module.padding_idx].zero_()
                elif isinstance(module, nn.LayerNorm):
                        module.bias.data.zero_()
                        module.weight.data.fill_(1.0)
                        
    return model

In [17]:
# ====================================================
# Model
# ====================================================
class MeanPooling(nn.Module):
    def __init__(self):
        super(MeanPooling, self).__init__()
        
    def forward(self, last_hidden_state, attention_mask):
        input_mask_expanded = attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float()
        sum_embeddings = torch.sum(last_hidden_state * input_mask_expanded, 1)
        sum_mask = input_mask_expanded.sum(1)
        sum_mask = torch.clamp(sum_mask, min=1e-9)
        mean_embeddings = sum_embeddings / sum_mask
        return mean_embeddings

class MaxPooling(nn.Module):
    def __init__(self):
        super(MaxPooling, self).__init__()
        
    def forward(self, last_hidden_state, attention_mask):
        input_mask_expanded = attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float()
        embeddings = last_hidden_state.clone()
        embeddings[input_mask_expanded == 0] = -1e4
        max_embeddings, _ = torch.max(embeddings, dim=1)
        return max_embeddings
    

class CustomModel(nn.Module):
    def __init__(self, cfg, config_path=None, pretrained=False):
        super().__init__()
        self.cfg = cfg
        if config_path is None:
            self.config = AutoConfig.from_pretrained(cfg.model, output_hidden_states=True)
            self.config.hidden_dropout = 0.
            self.config.hidden_dropout_prob = 0.
            self.config.attention_dropout = 0.
            self.config.attention_probs_dropout_prob = 0.
            LOGGER.info(self.config)
        else:
            self.config = torch.load(config_path)
        if pretrained:
            self.model = AutoModel.from_pretrained(cfg.model, config=self.config)
        else:
            self.model = AutoModel(self.config)
        if CFG.is_reinit_layer:
            self.model = reinit_layers(self.model)
            print(f'Reinitializing Last {CFG.num_reinit_layers} Layers.')
        if self.cfg.gradient_checkpointing:
            self.model.gradient_checkpointing_enable()

        # Freezing
        if cfg.freezing:
            # freezing embeddings and first 2 layers of encoder
            freeze((self.model).embeddings)
            freeze((self.model).encoder.layer[:2])
            cfg.after_freezed_parameters = filter(lambda parameter: parameter.requires_grad, (self.model).parameters())

        self.pool = MaxPooling()
        self.fc = nn.Linear(self.config.hidden_size, cfg.target_size)
        self._init_weights(self.fc)
        self.layer_norm1 = nn.LayerNorm(self.config.hidden_size)
        self._init_weights(self.fc)
        
    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.bias is not None:
                module.bias.data.zero_()
        elif isinstance(module, nn.Embedding):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.padding_idx is not None:
                module.weight.data[module.padding_idx].zero_()
        elif isinstance(module, nn.LayerNorm):
            module.bias.data.zero_()
            module.weight.data.fill_(1.0)
        
    def feature(self, inputs):
        outputs = self.model(**inputs)
        last_hidden_states = outputs[0]
        feature = self.pool(last_hidden_states, inputs['attention_mask'])
        return feature

    def forward(self, inputs):
        feature = self.feature(inputs)
        feature = self.layer_norm1(feature)
        output = self.fc(feature)
        return output

In [18]:
def calculate_loss(inputs, labels, model, criterion, is_valid=True, device="cpu"):    
    y_preds = model(inputs)
    loss = criterion(y_preds.view(-1, 1), labels.view(-1, 1))
    return (loss, y_preds) if is_valid else loss

In [19]:

# ====================================================
# Helper functions
# ====================================================
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (remain %s)' % (asMinutes(s), asMinutes(rs))


def train_fn(fold, train_loader, model, criterion, optimizer, epoch, scheduler, device, awp):
    model.zero_grad()
    model.train()
    awp_start = CFG.awp_start
    scaler = torch.cuda.amp.GradScaler(enabled=CFG.apex)
    losses = AverageMeter()
    start = end = time.time()
    global_step = 0
    for step, (inputs, labels) in enumerate(train_loader):
        inputs = collate(inputs)
        for k, v in inputs.items():
            inputs[k] = v.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        if epoch >= awp_start:
            awp.perturb(inputs, labels, criterion)
        with torch.cuda.amp.autocast(enabled=CFG.apex):
            loss = calculate_loss(inputs=inputs, labels=labels, model=model, criterion=criterion, is_valid=False, device=device)
        #print(y_preds.sigmoid().squeeze().view(1, -1))
        #loss = criterion(y_preds.sigmoid().squeeze(), labels.squeeze())
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        losses.update(loss.item(), batch_size)
        scaler.scale(loss).backward()
        if scaler is not None:
            scaler.unscale_(optimizer)
        torch.nn.utils.clip_grad_norm_(model.parameters(), CFG.max_grad_norm)
        awp.restore()
        if CFG.fgm:
          fgm.attack() 
          adversarial_loss = calculate_loss(inputs=inputs, labels=labels, model=model, criterion=criterion, is_valid=False, device=device)
          scaler.scale(adversarial_loss).backward()
          fgm.restore()
        if (step + 1) % CFG.gradient_accumulation_steps == 0:
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()
            global_step += 1
            if CFG.batch_scheduler:
                scheduler.step()
        end = time.time()
        if step % CFG.print_freq == 0 or step == (len(train_loader)-1):
            print('Epoch: [{0}][{1}/{2}] '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  #'Grad: {grad_norm:.4f}  '
                  'LR: {lr:.8f}  '
                  .format(epoch+1, step, len(train_loader), 
                          remain=timeSince(start, float(step+1)/len(train_loader)),
                          loss=losses,
                          #grad_norm=grad_norm,
                          lr=scheduler.get_lr()[0]))

    return losses.avg


def valid_fn(valid_loader, model, criterion, device):
    losses = AverageMeter()
    model.eval()
    preds = []
    start = end = time.time()
    for step, (inputs, labels) in enumerate(valid_loader):
        inputs = collate(inputs)
        for k, v in inputs.items():
            inputs[k] = v.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        with torch.no_grad():
            loss, y_preds = calculate_loss(inputs=inputs, labels=labels, model=model, criterion=criterion, is_valid=True, device=device)
        #loss = criterion(y_preds.sigmoid().squeeze(), labels.squeeze())
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        losses.update(loss.item(), batch_size)
        preds.append(y_preds.sigmoid().to('cpu').numpy())
        end = time.time()
        if step % CFG.print_freq == 0 or step == (len(valid_loader)-1):
            print('EVAL: [{0}/{1}] '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  .format(step, len(valid_loader),
                          loss=losses,
                          remain=timeSince(start, float(step+1)/len(valid_loader))))
    predictions = np.concatenate(preds)
    predictions = np.concatenate(predictions)
    return losses.avg, predictions


def inference_fn(test_loader, model, device):
    preds = []
    model.eval()
    model.to(device)
    tk0 = tqdm(test_loader, total=len(test_loader))
    for inputs in tk0:
        inputs = collate(inputs)
        for k, v in inputs.items():
            inputs[k] = v.to(device)
        with torch.no_grad():
            y_preds = model(inputs)
        preds.append(y_preds.sigmoid().to('cpu').numpy())
    predictions = np.concatenate(preds)
    return predictions

In [20]:

# ====================================================
# train loop
# ====================================================
def train_loop(folds, fold):
    
    LOGGER.info(f"========== fold: {fold} training ==========")

    # ====================================================
    # loader
    # ====================================================
    train_folds = folds[folds['kfold'] != fold].reset_index(drop=True)
    valid_folds = folds[folds['kfold'] == fold].reset_index(drop=True)
    valid_labels = valid_folds['y'].values
    
    train_dataset = TrainDataset(CFG, train_folds)
    valid_dataset = TrainDataset(CFG, valid_folds)


    train_loader = DataLoader(train_dataset,
                              batch_size=CFG.batch_size,
                              shuffle=True,
                              num_workers=CFG.num_workers, pin_memory=True, drop_last=True)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=CFG.batch_size*2,
                              shuffle=False,
                              num_workers=CFG.num_workers, pin_memory=True, drop_last=False)

    # ====================================================
    # model & optimizer
    # ====================================================
    model = CustomModel(CFG, config_path=None, pretrained=True)
    torch.save(model.config, OUTPUT_EXP_DIR+'config.pth')
    model.to(device)
    
    def get_optimizer_params(model, encoder_lr=5e-6, decoder_lr=1e-4, weight_decay=0.0):
        param_optimizer = list(model.named_parameters())
        no_decay = ["bias", "LayerNorm.bias", 
                    "LayerNorm.weight"]
        group1=['layer.0.','layer.1.','layer.2.','layer.3.']
        group2=['layer.4.','layer.5.','layer.6.','layer.7.']    
        group3=['layer.8.','layer.9.','layer.10.','layer.11.']
        group_all=['layer.0.','layer.1.','layer.2.','layer.3.','layer.4.','layer.5.','layer.6.','layer.7.','layer.8.','layer.9.','layer.10.','layer.11.']
        optimizer_parameters = [
        {'params': [p for n, p in model.model.named_parameters() if not any(nd in n for nd in no_decay) and not any(nd in n for nd in group_all)],'weight_decay': weight_decay},
        {'params': [p for n, p in model.model.named_parameters() if not any(nd in n for nd in no_decay) and any(nd in n for nd in group1)],'weight_decay': weight_decay, 'lr': encoder_lr/2.6},
        {'params': [p for n, p in model.model.named_parameters() if not any(nd in n for nd in no_decay) and any(nd in n for nd in group2)],'weight_decay': weight_decay, 'lr': encoder_lr},
        {'params': [p for n, p in model.model.named_parameters() if not any(nd in n for nd in no_decay) and any(nd in n for nd in group3)],'weight_decay': weight_decay, 'lr': encoder_lr*2.6},
        {'params': [p for n, p in model.model.named_parameters() if any(nd in n for nd in no_decay) and not any(nd in n for nd in group_all)],'weight_decay': 0.0},
        {'params': [p for n, p in model.model.named_parameters() if any(nd in n for nd in no_decay) and any(nd in n for nd in group1)],'weight_decay': 0.0, 'lr': encoder_lr/2.6},
        {'params': [p for n, p in model.model.named_parameters() if any(nd in n for nd in no_decay) and any(nd in n for nd in group2)],'weight_decay': 0.0, 'lr': encoder_lr},
        {'params': [p for n, p in model.model.named_parameters() if any(nd in n for nd in no_decay) and any(nd in n for nd in group3)],'weight_decay': 0.0, 'lr': encoder_lr*2.6},
        {'params': [p for n, p in model.named_parameters() if "model" not in n], 'lr':decoder_lr, "momentum" : 0.99},
    ]
        return optimizer_parameters

    optimizer_parameters = get_optimizer_params(model,
                                                encoder_lr=CFG.encoder_lr, 
                                                decoder_lr=CFG.decoder_lr,
                                                weight_decay=CFG.weight_decay)
    optimizer = AdamW(optimizer_parameters, lr=CFG.encoder_lr, eps=CFG.eps, betas=CFG.betas)
    
    # ====================================================
    # scheduler
    # ====================================================
    def get_scheduler(cfg, optimizer, num_train_steps):
        if cfg.scheduler == 'linear':
            scheduler = get_linear_schedule_with_warmup(
                optimizer, num_warmup_steps=cfg.num_warmup_steps, num_training_steps=num_train_steps
            )
        elif cfg.scheduler == 'cosine':
            scheduler = get_cosine_schedule_with_warmup(
                optimizer, num_warmup_steps=cfg.num_warmup_steps, num_training_steps=num_train_steps, num_cycles=cfg.num_cycles
            )
        return scheduler
    
    num_train_steps = int(len(train_folds) / CFG.batch_size * CFG.epochs)
    scheduler = get_scheduler(CFG, optimizer, num_train_steps)

    # ====================================================
    # loop
    # ====================================================
    criterion = nn.BCEWithLogitsLoss()

    print('Enable AWP')
    awp = AWP(model, optimizer, adv_lr=0.001, adv_eps=0.001)
    #print('Enable FGM')
    #fgm = FGM(model=model, eps=0.1)
    
    best_score = -1.

    for epoch in range(CFG.epochs):

        start_time = time.time()

        # train
        avg_loss = train_fn(fold, train_loader, model, criterion, optimizer, epoch, scheduler, device, awp)

        # eval
        avg_val_loss, predictions = valid_fn(valid_loader, model, criterion, device)
        
        # scoring
        score_05 = get_score(valid_labels, predictions)
        score = get_acc_score(valid_labels, predictions)

        elapsed = time.time() - start_time

        LOGGER.info(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.0f}s')
        LOGGER.info(f'Epoch {epoch+1} - Score: {score:.4f}')

        
        if best_score < score:
            best_score = score
            LOGGER.info(f'Epoch {epoch+1} - Save Best Score: {best_score:.4f} Model')
            torch.save({'model': model.state_dict(),
                        'predictions': predictions},
                        OUTPUT_EXP_DIR+f"{CFG.model_name.replace('/', '-')}_fold{fold}_best.pth")

    predictions = torch.load(OUTPUT_EXP_DIR+f"{CFG.model_name.replace('/', '-')}_fold{fold}_best.pth", 
                             map_location=torch.device('cpu'))['predictions']
    valid_folds['pred'] = predictions

    torch.cuda.empty_cache()
    gc.collect()
    
    return valid_folds

In [None]:
if __name__ == '__main__':
    
    def get_result(oof_df):
        labels = oof_df['y'].values
        preds = oof_df['pred'].values
        score = get_score(labels, preds)
        acc_score = get_acc_score(labels, preds)
        LOGGER.info(f'Score: {score:<.4f}')
        LOGGER.info(f'ACC BEST Score: {acc_score:<.4f}')
    
    if CFG.train:
        oof_df = pd.DataFrame()
        for fold in range(CFG.n_fold):
            if fold in CFG.trn_fold:
                _oof_df = train_loop(train, fold)
                oof_df = pd.concat([oof_df, _oof_df])
                LOGGER.info(f"========== fold: {fold} result ==========")
                get_result(_oof_df)
            #break
        oof_df = oof_df.reset_index(drop=True)
        LOGGER.info(f"========== CV ==========")
        get_result(oof_df)
        oof_df.to_pickle(OUTPUT_EXP_DIR+'oof_df.pkl')

RobertaConfig {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_roberta_base_epoch10",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.28.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 50265
}

INFO:__main__:RobertaConfig {
  "_name_or_path": "/content/drive/MyDrive/Competitio

Enable AWP
Epoch: [1][0/279] Elapsed 0m 1s (remain 6m 54s) Loss: 1.0609(1.0609) LR: 0.00002000  
Epoch: [1][100/279] Elapsed 0m 10s (remain 0m 17s) Loss: 0.6128(0.6346) LR: 0.00001974  
Epoch: [1][200/279] Elapsed 0m 18s (remain 0m 7s) Loss: 0.4776(0.6126) LR: 0.00001900  
Epoch: [1][278/279] Elapsed 0m 24s (remain 0m 0s) Loss: 0.6588(0.6069) LR: 0.00001810  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6004(0.6004) 


Epoch 1 - avg_train_loss: 0.6069  avg_val_loss: 0.6046  time: 28s
INFO:__main__:Epoch 1 - avg_train_loss: 0.6069  avg_val_loss: 0.6046  time: 28s
Epoch 1 - Score: 0.7028
INFO:__main__:Epoch 1 - Score: 0.7028
Epoch 1 - Save Best Score: 0.7028 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7028 Model


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.6224(0.6046) 
f1 score : 0.29864253393665163
recall score : 0.21568627450980393
precision score : 0.4852941176470588
thresh : 0.58
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 22s) Loss: 0.5899(0.5899) LR: 0.00001808  
Epoch: [2][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.6082(0.4995) LR: 0.00001657  
Epoch: [2][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.4693(0.4847) LR: 0.00001473  
Epoch: [2][278/279] Elapsed 0m 24s (remain 0m 0s) Loss: 0.6415(0.4774) LR: 0.00001312  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5252(0.5252) 


Epoch 2 - avg_train_loss: 0.4774  avg_val_loss: 0.5650  time: 27s
INFO:__main__:Epoch 2 - avg_train_loss: 0.4774  avg_val_loss: 0.5650  time: 27s
Epoch 2 - Score: 0.7028
INFO:__main__:Epoch 2 - Score: 0.7028


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.6157(0.5650) 
f1 score : 0.30493273542600896
recall score : 0.2222222222222222
precision score : 0.4857142857142857
thresh : 0.57
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 26s) Loss: 0.3542(0.3542) LR: 0.00001309  
Epoch: [3][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.1410(0.2170) LR: 0.00001090  
Epoch: [3][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.2636(0.2059) LR: 0.00000866  
Epoch: [3][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.1632(0.1924) LR: 0.00000695  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5840(0.5840) 


Epoch 3 - avg_train_loss: 0.1924  avg_val_loss: 0.6431  time: 27s
INFO:__main__:Epoch 3 - avg_train_loss: 0.1924  avg_val_loss: 0.6431  time: 27s
Epoch 3 - Score: 0.7129
INFO:__main__:Epoch 3 - Score: 0.7129
Epoch 3 - Save Best Score: 0.7129 Model
INFO:__main__:Epoch 3 - Save Best Score: 0.7129 Model


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.7821(0.6431) 
f1 score : 0.3539823008849558
recall score : 0.26143790849673204
precision score : 0.547945205479452
thresh : 0.61
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 31s) Loss: 0.0423(0.0423) LR: 0.00000693  
Epoch: [4][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.0253(0.0341) LR: 0.00000488  
Epoch: [4][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.0232(0.0335) LR: 0.00000310  
Epoch: [4][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.0273(0.0312) LR: 0.00000194  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6019(0.6019) 


Epoch 4 - avg_train_loss: 0.0312  avg_val_loss: 0.7073  time: 27s
INFO:__main__:Epoch 4 - avg_train_loss: 0.0312  avg_val_loss: 0.7073  time: 27s
Epoch 4 - Score: 0.7169
INFO:__main__:Epoch 4 - Score: 0.7169
Epoch 4 - Save Best Score: 0.7169 Model
INFO:__main__:Epoch 4 - Save Best Score: 0.7169 Model


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.8981(0.7073) 
f1 score : 0.41860465116279066
recall score : 0.35294117647058826
precision score : 0.5142857142857142
thresh : 0.64
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 26s) Loss: 0.0149(0.0149) LR: 0.00000193  
Epoch: [5][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.0173(0.0167) LR: 0.00000082  
Epoch: [5][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.0165(0.0162) LR: 0.00000017  
Epoch: [5][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.0165(0.0164) LR: 0.00000000  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6510(0.6510) 


Epoch 5 - avg_train_loss: 0.0164  avg_val_loss: 0.7557  time: 27s
INFO:__main__:Epoch 5 - avg_train_loss: 0.0164  avg_val_loss: 0.7557  time: 27s
Epoch 5 - Score: 0.7169
INFO:__main__:Epoch 5 - Score: 0.7169


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.9694(0.7557) 
f1 score : 0.39669421487603307
recall score : 0.3137254901960784
precision score : 0.5393258426966292
thresh : 0.56


Score: 0.6988
INFO:__main__:Score: 0.6988
ACC BEST Score: 0.7169
INFO:__main__:ACC BEST Score: 0.7169
RobertaConfig {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_roberta_base_epoch10",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.28.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "voc

f1 score : 0.41860465116279066
recall score : 0.35294117647058826
precision score : 0.5142857142857142
thresh : 0.64


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_roberta_base_epoch10 were not used when initializing RobertaModel: ['lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Enable AWP
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 35s) Loss: 0.7089(0.7089) LR: 0.00002000  
Epoch: [1][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.5177(0.6098) LR: 0.00001974  
Epoch: [1][200/279] Elapsed 0m 16s (remain 0m 6s) Loss: 0.7151(0.6001) LR: 0.00001900  
Epoch: [1][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.7860(0.6024) LR: 0.00001810  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5125(0.5125) 


Epoch 1 - avg_train_loss: 0.6024  avg_val_loss: 0.5971  time: 27s
INFO:__main__:Epoch 1 - avg_train_loss: 0.6024  avg_val_loss: 0.5971  time: 27s
Epoch 1 - Score: 0.6948
INFO:__main__:Epoch 1 - Score: 0.6948
Epoch 1 - Save Best Score: 0.6948 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.6948 Model


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.5429(0.5971) 
f1 score : 0.09523809523809523
recall score : 0.05228758169934641
precision score : 0.5333333333333333
thresh : 0.5
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 25s) Loss: 0.6506(0.6506) LR: 0.00001808  
Epoch: [2][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.4719(0.5070) LR: 0.00001657  
Epoch: [2][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.4523(0.5232) LR: 0.00001473  
Epoch: [2][278/279] Elapsed 0m 24s (remain 0m 0s) Loss: 0.6671(0.5199) LR: 0.00001312  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5187(0.5187) 


Epoch 2 - avg_train_loss: 0.5199  avg_val_loss: 0.5891  time: 27s
INFO:__main__:Epoch 2 - avg_train_loss: 0.5199  avg_val_loss: 0.5891  time: 27s
Epoch 2 - Score: 0.7169
INFO:__main__:Epoch 2 - Score: 0.7169
Epoch 2 - Save Best Score: 0.7169 Model
INFO:__main__:Epoch 2 - Save Best Score: 0.7169 Model


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.4929(0.5891) 
f1 score : 0.40298507462686567
recall score : 0.35294117647058826
precision score : 0.46956521739130436
thresh : 0.62
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 26s) Loss: 0.3667(0.3667) LR: 0.00001309  
Epoch: [3][100/279] Elapsed 0m 9s (remain 0m 16s) Loss: 0.4583(0.3003) LR: 0.00001090  
Epoch: [3][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.1385(0.2774) LR: 0.00000866  
Epoch: [3][278/279] Elapsed 0m 24s (remain 0m 0s) Loss: 0.2311(0.2620) LR: 0.00000695  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4639(0.4639) 


Epoch 3 - avg_train_loss: 0.2620  avg_val_loss: 0.6785  time: 27s
INFO:__main__:Epoch 3 - avg_train_loss: 0.2620  avg_val_loss: 0.6785  time: 27s
Epoch 3 - Score: 0.7129
INFO:__main__:Epoch 3 - Score: 0.7129


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.4334(0.6785) 
f1 score : 0.3013698630136986
recall score : 0.21568627450980393
precision score : 0.5
thresh : 0.76
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 30s) Loss: 0.0705(0.0705) LR: 0.00000693  
Epoch: [4][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.0573(0.0611) LR: 0.00000488  
Epoch: [4][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.0193(0.0554) LR: 0.00000310  
Epoch: [4][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.0421(0.0517) LR: 0.00000194  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5250(0.5250) 


Epoch 4 - avg_train_loss: 0.0517  avg_val_loss: 0.7724  time: 27s
INFO:__main__:Epoch 4 - avg_train_loss: 0.0517  avg_val_loss: 0.7724  time: 27s
Epoch 4 - Score: 0.7028
INFO:__main__:Epoch 4 - Score: 0.7028


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.4578(0.7724) 
f1 score : 0.3794466403162055
recall score : 0.3137254901960784
precision score : 0.48
thresh : 0.76
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 28s) Loss: 0.0122(0.0122) LR: 0.00000193  
Epoch: [5][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.0213(0.0207) LR: 0.00000082  
Epoch: [5][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.0167(0.0208) LR: 0.00000017  
Epoch: [5][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.0102(0.0209) LR: 0.00000000  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5405(0.5405) 


Epoch 5 - avg_train_loss: 0.0209  avg_val_loss: 0.8102  time: 27s
INFO:__main__:Epoch 5 - avg_train_loss: 0.0209  avg_val_loss: 0.8102  time: 27s
Epoch 5 - Score: 0.6968
INFO:__main__:Epoch 5 - Score: 0.6968


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.4661(0.8102) 
f1 score : 0.3783783783783784
recall score : 0.3202614379084967
precision score : 0.46226415094339623
thresh : 0.79


Score: 0.6787
INFO:__main__:Score: 0.6787
ACC BEST Score: 0.7169
INFO:__main__:ACC BEST Score: 0.7169
RobertaConfig {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_roberta_base_epoch10",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.28.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "voc

f1 score : 0.40298507462686567
recall score : 0.35294117647058826
precision score : 0.46956521739130436
thresh : 0.62


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_roberta_base_epoch10 were not used when initializing RobertaModel: ['lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Enable AWP
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 34s) Loss: 0.7783(0.7783) LR: 0.00002000  
Epoch: [1][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.5903(0.6287) LR: 0.00001974  
Epoch: [1][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.6312(0.6166) LR: 0.00001900  
Epoch: [1][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.5630(0.6116) LR: 0.00001810  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5653(0.5653) 


Epoch 1 - avg_train_loss: 0.6116  avg_val_loss: 0.5873  time: 27s
INFO:__main__:Epoch 1 - avg_train_loss: 0.6116  avg_val_loss: 0.5873  time: 27s
Epoch 1 - Score: 0.7129
INFO:__main__:Epoch 1 - Score: 0.7129
Epoch 1 - Save Best Score: 0.7129 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7129 Model


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.4361(0.5873) 
f1 score : 0.14201183431952663
recall score : 0.0784313725490196
precision score : 0.75
thresh : 0.49
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 30s) Loss: 0.5336(0.5336) LR: 0.00001808  
Epoch: [2][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.6171(0.5126) LR: 0.00001657  
Epoch: [2][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.3106(0.4980) LR: 0.00001473  
Epoch: [2][278/279] Elapsed 0m 24s (remain 0m 0s) Loss: 0.4427(0.4987) LR: 0.00001312  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5465(0.5465) 


Epoch 2 - avg_train_loss: 0.4987  avg_val_loss: 0.5707  time: 27s
INFO:__main__:Epoch 2 - avg_train_loss: 0.4987  avg_val_loss: 0.5707  time: 27s
Epoch 2 - Score: 0.7249
INFO:__main__:Epoch 2 - Score: 0.7249
Epoch 2 - Save Best Score: 0.7249 Model
INFO:__main__:Epoch 2 - Save Best Score: 0.7249 Model


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.3821(0.5707) 
f1 score : 0.25263157894736843
recall score : 0.1568627450980392
precision score : 0.6486486486486487
thresh : 0.46
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 32s) Loss: 0.2640(0.2640) LR: 0.00001309  
Epoch: [3][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.1545(0.2792) LR: 0.00001090  
Epoch: [3][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.3504(0.2570) LR: 0.00000866  
Epoch: [3][278/279] Elapsed 0m 24s (remain 0m 0s) Loss: 0.1046(0.2409) LR: 0.00000695  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6649(0.6649) 


Epoch 3 - avg_train_loss: 0.2409  avg_val_loss: 0.6073  time: 27s
INFO:__main__:Epoch 3 - avg_train_loss: 0.2409  avg_val_loss: 0.6073  time: 27s
Epoch 3 - Score: 0.7189
INFO:__main__:Epoch 3 - Score: 0.7189


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.3742(0.6073) 
f1 score : 0.39669421487603307
recall score : 0.3137254901960784
precision score : 0.5393258426966292
thresh : 0.41
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 35s) Loss: 0.0552(0.0552) LR: 0.00000693  
Epoch: [4][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.0452(0.0538) LR: 0.00000488  
Epoch: [4][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.0162(0.0504) LR: 0.00000310  
Epoch: [4][278/279] Elapsed 0m 24s (remain 0m 0s) Loss: 0.0803(0.0493) LR: 0.00000194  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.7558(0.7558) 


Epoch 4 - avg_train_loss: 0.0493  avg_val_loss: 0.6705  time: 27s
INFO:__main__:Epoch 4 - avg_train_loss: 0.0493  avg_val_loss: 0.6705  time: 27s
Epoch 4 - Score: 0.7169
INFO:__main__:Epoch 4 - Score: 0.7169


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.3807(0.6705) 
f1 score : 0.38260869565217387
recall score : 0.2875816993464052
precision score : 0.5714285714285714
thresh : 0.46
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 29s) Loss: 0.0324(0.0324) LR: 0.00000193  
Epoch: [5][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.0346(0.0244) LR: 0.00000082  
Epoch: [5][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.0181(0.0243) LR: 0.00000017  
Epoch: [5][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.0211(0.0240) LR: 0.00000000  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.8074(0.8074) 


Epoch 5 - avg_train_loss: 0.0240  avg_val_loss: 0.7043  time: 27s
INFO:__main__:Epoch 5 - avg_train_loss: 0.0240  avg_val_loss: 0.7043  time: 27s
Epoch 5 - Score: 0.7249
INFO:__main__:Epoch 5 - Score: 0.7249


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.3843(0.7043) 
f1 score : 0.37333333333333335
recall score : 0.27450980392156865
precision score : 0.5833333333333334
thresh : 0.46


Score: 0.7149
INFO:__main__:Score: 0.7149
ACC BEST Score: 0.7249
INFO:__main__:ACC BEST Score: 0.7249
RobertaConfig {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_roberta_base_epoch10",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.28.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "voc

f1 score : 0.25263157894736843
recall score : 0.1568627450980392
precision score : 0.6486486486486487
thresh : 0.46


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_roberta_base_epoch10 were not used when initializing RobertaModel: ['lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Enable AWP
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 37s) Loss: 0.7679(0.7679) LR: 0.00002000  
Epoch: [1][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.7806(0.6284) LR: 0.00001974  
Epoch: [1][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.6158(0.6133) LR: 0.00001900  
Epoch: [1][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.5552(0.6077) LR: 0.00001810  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6825(0.6825) 


Epoch 1 - avg_train_loss: 0.6077  avg_val_loss: 0.6043  time: 27s
INFO:__main__:Epoch 1 - avg_train_loss: 0.6077  avg_val_loss: 0.6043  time: 27s
Epoch 1 - Score: 0.7048
INFO:__main__:Epoch 1 - Score: 0.7048
Epoch 1 - Save Best Score: 0.7048 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7048 Model


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.7694(0.6043) 
f1 score : 0.12571428571428572
recall score : 0.07236842105263158
precision score : 0.4782608695652174
thresh : 0.55
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 30s) Loss: 0.5879(0.5879) LR: 0.00001808  
Epoch: [2][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.3496(0.4976) LR: 0.00001657  
Epoch: [2][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.4563(0.4947) LR: 0.00001473  
Epoch: [2][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.5871(0.4894) LR: 0.00001312  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6549(0.6549) 


Epoch 2 - avg_train_loss: 0.4894  avg_val_loss: 0.5852  time: 27s
INFO:__main__:Epoch 2 - avg_train_loss: 0.4894  avg_val_loss: 0.5852  time: 27s
Epoch 2 - Score: 0.7108
INFO:__main__:Epoch 2 - Score: 0.7108
Epoch 2 - Save Best Score: 0.7108 Model
INFO:__main__:Epoch 2 - Save Best Score: 0.7108 Model


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.5790(0.5852) 
f1 score : 0.34545454545454546
recall score : 0.25
precision score : 0.5588235294117647
thresh : 0.5
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 38s) Loss: 0.3312(0.3312) LR: 0.00001309  
Epoch: [3][100/279] Elapsed 0m 9s (remain 0m 16s) Loss: 0.2121(0.2372) LR: 0.00001090  
Epoch: [3][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.1523(0.2164) LR: 0.00000866  
Epoch: [3][278/279] Elapsed 0m 24s (remain 0m 0s) Loss: 0.2086(0.2066) LR: 0.00000695  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.7606(0.7606) 


Epoch 3 - avg_train_loss: 0.2066  avg_val_loss: 0.6507  time: 28s
INFO:__main__:Epoch 3 - avg_train_loss: 0.2066  avg_val_loss: 0.6507  time: 28s
Epoch 3 - Score: 0.7189
INFO:__main__:Epoch 3 - Score: 0.7189
Epoch 3 - Save Best Score: 0.7189 Model
INFO:__main__:Epoch 3 - Save Best Score: 0.7189 Model


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.6223(0.6507) 
f1 score : 0.352
recall score : 0.2894736842105263
precision score : 0.4489795918367347
thresh : 0.77
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 40s) Loss: 0.0697(0.0697) LR: 0.00000693  
Epoch: [4][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.0254(0.0386) LR: 0.00000488  
Epoch: [4][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.0117(0.0343) LR: 0.00000310  
Epoch: [4][278/279] Elapsed 0m 24s (remain 0m 0s) Loss: 0.0141(0.0330) LR: 0.00000194  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.8778(0.8778) 


Epoch 4 - avg_train_loss: 0.0330  avg_val_loss: 0.8022  time: 27s
INFO:__main__:Epoch 4 - avg_train_loss: 0.0330  avg_val_loss: 0.8022  time: 27s
Epoch 4 - Score: 0.7108
INFO:__main__:Epoch 4 - Score: 0.7108


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.6648(0.8022) 
f1 score : 0.38735177865612647
recall score : 0.3223684210526316
precision score : 0.48514851485148514
thresh : 0.7
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 34s) Loss: 0.0124(0.0124) LR: 0.00000193  
Epoch: [5][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.0172(0.0171) LR: 0.00000082  
Epoch: [5][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.0114(0.0161) LR: 0.00000017  
Epoch: [5][278/279] Elapsed 0m 24s (remain 0m 0s) Loss: 0.0069(0.0157) LR: 0.00000000  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.9543(0.9543) 


Epoch 5 - avg_train_loss: 0.0157  avg_val_loss: 0.8631  time: 27s
INFO:__main__:Epoch 5 - avg_train_loss: 0.0157  avg_val_loss: 0.8631  time: 27s
Epoch 5 - Score: 0.7129
INFO:__main__:Epoch 5 - Score: 0.7129


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.7045(0.8631) 
f1 score : 0.3734439834024896
recall score : 0.29605263157894735
precision score : 0.5056179775280899
thresh : 0.78


Score: 0.6747
INFO:__main__:Score: 0.6747
ACC BEST Score: 0.7189
INFO:__main__:ACC BEST Score: 0.7189
RobertaConfig {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_roberta_base_epoch10",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.28.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "voc

f1 score : 0.352
recall score : 0.2894736842105263
precision score : 0.4489795918367347
thresh : 0.77


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_roberta_base_epoch10 were not used when initializing RobertaModel: ['lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Enable AWP
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 39s) Loss: 0.5827(0.5827) LR: 0.00002000  
Epoch: [1][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.6417(0.6011) LR: 0.00001974  
Epoch: [1][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.5879(0.6039) LR: 0.00001900  
Epoch: [1][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.5859(0.6048) LR: 0.00001810  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5607(0.5607) 


Epoch 1 - avg_train_loss: 0.6048  avg_val_loss: 0.5920  time: 27s
INFO:__main__:Epoch 1 - avg_train_loss: 0.6048  avg_val_loss: 0.5920  time: 27s
Epoch 1 - Score: 0.7143
INFO:__main__:Epoch 1 - Score: 0.7143
Epoch 1 - Save Best Score: 0.7143 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7143 Model


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.5591(0.5920) 
f1 score : 0.16470588235294117
recall score : 0.09210526315789473
precision score : 0.7777777777777778
thresh : 0.49
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 33s) Loss: 0.6579(0.6579) LR: 0.00001809  
Epoch: [2][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.5716(0.5239) LR: 0.00001657  
Epoch: [2][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.4790(0.5115) LR: 0.00001473  
Epoch: [2][278/279] Elapsed 0m 24s (remain 0m 0s) Loss: 0.5728(0.5026) LR: 0.00001312  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4720(0.4720) 


Epoch 2 - avg_train_loss: 0.5026  avg_val_loss: 0.5884  time: 27s
INFO:__main__:Epoch 2 - avg_train_loss: 0.5026  avg_val_loss: 0.5884  time: 27s
Epoch 2 - Score: 0.7143
INFO:__main__:Epoch 2 - Score: 0.7143


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.4343(0.5884) 
f1 score : 0.17241379310344826
recall score : 0.09868421052631579
precision score : 0.6818181818181818
thresh : 0.45
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 27s) Loss: 0.4009(0.4009) LR: 0.00001310  
Epoch: [3][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.2921(0.2567) LR: 0.00001091  
Epoch: [3][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.2276(0.2366) LR: 0.00000867  
Epoch: [3][278/279] Elapsed 0m 24s (remain 0m 0s) Loss: 0.2377(0.2293) LR: 0.00000696  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6617(0.6617) 


Epoch 3 - avg_train_loss: 0.2293  avg_val_loss: 0.6511  time: 27s
INFO:__main__:Epoch 3 - avg_train_loss: 0.2293  avg_val_loss: 0.6511  time: 27s
Epoch 3 - Score: 0.7022
INFO:__main__:Epoch 3 - Score: 0.7022


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.6132(0.6511) 
f1 score : 0.4610591900311526
recall score : 0.4868421052631579
precision score : 0.4378698224852071
thresh : 0.79
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 36s) Loss: 0.0597(0.0597) LR: 0.00000694  
Epoch: [4][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.0188(0.0445) LR: 0.00000490  
Epoch: [4][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.0236(0.0385) LR: 0.00000311  
Epoch: [4][278/279] Elapsed 0m 24s (remain 0m 0s) Loss: 0.0170(0.0368) LR: 0.00000195  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.7238(0.7238) 


Epoch 4 - avg_train_loss: 0.0368  avg_val_loss: 0.7431  time: 27s
INFO:__main__:Epoch 4 - avg_train_loss: 0.0368  avg_val_loss: 0.7431  time: 27s
Epoch 4 - Score: 0.7103
INFO:__main__:Epoch 4 - Score: 0.7103


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.6163(0.7431) 
f1 score : 0.3643410852713178
recall score : 0.3092105263157895
precision score : 0.44339622641509435
thresh : 0.68
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 37s) Loss: 0.0149(0.0149) LR: 0.00000194  
Epoch: [5][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.0207(0.0162) LR: 0.00000082  
Epoch: [5][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.0212(0.0164) LR: 0.00000017  
Epoch: [5][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.0192(0.0162) LR: 0.00000000  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.7718(0.7718) 


Epoch 5 - avg_train_loss: 0.0162  avg_val_loss: 0.7768  time: 27s
INFO:__main__:Epoch 5 - avg_train_loss: 0.0162  avg_val_loss: 0.7768  time: 27s
Epoch 5 - Score: 0.7183
INFO:__main__:Epoch 5 - Score: 0.7183
Epoch 5 - Save Best Score: 0.7183 Model
INFO:__main__:Epoch 5 - Save Best Score: 0.7183 Model


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.6353(0.7768) 
f1 score : 0.37007874015748027
recall score : 0.3092105263157895
precision score : 0.46078431372549017
thresh : 0.7


Score: 0.6781
INFO:__main__:Score: 0.6781
ACC BEST Score: 0.7183
INFO:__main__:ACC BEST Score: 0.7183
RobertaConfig {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_roberta_base_epoch10",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.28.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "voc

f1 score : 0.37007874015748027
recall score : 0.3092105263157895
precision score : 0.46078431372549017
thresh : 0.7


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_roberta_base_epoch10 were not used when initializing RobertaModel: ['lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Enable AWP
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 40s) Loss: 0.6682(0.6682) LR: 0.00002000  
Epoch: [1][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.6599(0.6275) LR: 0.00001974  
Epoch: [1][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.3541(0.6122) LR: 0.00001900  
Epoch: [1][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.4795(0.6108) LR: 0.00001810  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6777(0.6777) 


Epoch 1 - avg_train_loss: 0.6108  avg_val_loss: 0.5996  time: 27s
INFO:__main__:Epoch 1 - avg_train_loss: 0.6108  avg_val_loss: 0.5996  time: 27s
Epoch 1 - Score: 0.7062
INFO:__main__:Epoch 1 - Score: 0.7062
Epoch 1 - Save Best Score: 0.7062 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7062 Model


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.7665(0.5996) 
f1 score : 0.08588957055214723
recall score : 0.046052631578947366
precision score : 0.6363636363636364
thresh : 0.61
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 31s) Loss: 0.5951(0.5951) LR: 0.00001809  
Epoch: [2][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.4912(0.4826) LR: 0.00001657  
Epoch: [2][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.5214(0.4809) LR: 0.00001473  
Epoch: [2][278/279] Elapsed 0m 24s (remain 0m 0s) Loss: 0.4688(0.4848) LR: 0.00001312  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6669(0.6669) 


Epoch 2 - avg_train_loss: 0.4848  avg_val_loss: 0.5861  time: 27s
INFO:__main__:Epoch 2 - avg_train_loss: 0.4848  avg_val_loss: 0.5861  time: 27s
Epoch 2 - Score: 0.7062
INFO:__main__:Epoch 2 - Score: 0.7062


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.6453(0.5861) 
f1 score : 0.2745098039215686
recall score : 0.18421052631578946
precision score : 0.5384615384615384
thresh : 0.68
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 28s) Loss: 0.3805(0.3805) LR: 0.00001310  
Epoch: [3][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.1540(0.2332) LR: 0.00001091  
Epoch: [3][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.1850(0.2188) LR: 0.00000867  
Epoch: [3][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.1385(0.2092) LR: 0.00000696  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.8679(0.8679) 


Epoch 3 - avg_train_loss: 0.2092  avg_val_loss: 0.6693  time: 27s
INFO:__main__:Epoch 3 - avg_train_loss: 0.2092  avg_val_loss: 0.6693  time: 27s
Epoch 3 - Score: 0.6801
INFO:__main__:Epoch 3 - Score: 0.6801


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.7624(0.6693) 
f1 score : 0.45962732919254656
recall score : 0.4868421052631579
precision score : 0.43529411764705883
thresh : 0.79
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 31s) Loss: 0.0811(0.0811) LR: 0.00000694  
Epoch: [4][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.0232(0.0403) LR: 0.00000490  
Epoch: [4][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.0197(0.0356) LR: 0.00000311  
Epoch: [4][278/279] Elapsed 0m 24s (remain 0m 0s) Loss: 0.0258(0.0342) LR: 0.00000195  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 1.0471(1.0471) 


Epoch 4 - avg_train_loss: 0.0342  avg_val_loss: 0.7479  time: 27s
INFO:__main__:Epoch 4 - avg_train_loss: 0.0342  avg_val_loss: 0.7479  time: 27s
Epoch 4 - Score: 0.6962
INFO:__main__:Epoch 4 - Score: 0.6962


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 1.0504(0.7479) 
f1 score : 0.3374485596707819
recall score : 0.26973684210526316
precision score : 0.45054945054945056
thresh : 0.76
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 34s) Loss: 0.0141(0.0141) LR: 0.00000194  
Epoch: [5][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.0138(0.0163) LR: 0.00000082  
Epoch: [5][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.0100(0.0164) LR: 0.00000017  
Epoch: [5][278/279] Elapsed 0m 24s (remain 0m 0s) Loss: 0.0168(0.0159) LR: 0.00000000  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 1.1113(1.1113) 


Epoch 5 - avg_train_loss: 0.0159  avg_val_loss: 0.7815  time: 27s
INFO:__main__:Epoch 5 - avg_train_loss: 0.0159  avg_val_loss: 0.7815  time: 27s
Epoch 5 - Score: 0.6962
INFO:__main__:Epoch 5 - Score: 0.6962


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 1.1239(0.7815) 
f1 score : 0.3481781376518219
recall score : 0.28289473684210525
precision score : 0.45263157894736844
thresh : 0.78


Score: 0.7002
INFO:__main__:Score: 0.7002
ACC BEST Score: 0.7062
INFO:__main__:ACC BEST Score: 0.7062
RobertaConfig {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_roberta_base_epoch10",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.28.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "voc

f1 score : 0.08588957055214723
recall score : 0.046052631578947366
precision score : 0.6363636363636364
thresh : 0.61


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_roberta_base_epoch10 were not used when initializing RobertaModel: ['lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Enable AWP
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 40s) Loss: 0.4817(0.4817) LR: 0.00002000  
Epoch: [1][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.4890(0.6160) LR: 0.00001974  
Epoch: [1][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.5003(0.6166) LR: 0.00001900  
Epoch: [1][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.5042(0.6030) LR: 0.00001810  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5615(0.5615) 


Epoch 1 - avg_train_loss: 0.6030  avg_val_loss: 0.5782  time: 27s
INFO:__main__:Epoch 1 - avg_train_loss: 0.6030  avg_val_loss: 0.5782  time: 27s
Epoch 1 - Score: 0.7243
INFO:__main__:Epoch 1 - Score: 0.7243
Epoch 1 - Save Best Score: 0.7243 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7243 Model


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.5270(0.5782) 
f1 score : 0.3285024154589372
recall score : 0.2236842105263158
precision score : 0.6181818181818182
thresh : 0.54
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 28s) Loss: 0.4476(0.4476) LR: 0.00001809  
Epoch: [2][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.4854(0.4950) LR: 0.00001657  
Epoch: [2][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.3128(0.4940) LR: 0.00001473  
Epoch: [2][278/279] Elapsed 0m 24s (remain 0m 0s) Loss: 0.6971(0.4913) LR: 0.00001312  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6224(0.6224) 


Epoch 2 - avg_train_loss: 0.4913  avg_val_loss: 0.5913  time: 28s
INFO:__main__:Epoch 2 - avg_train_loss: 0.4913  avg_val_loss: 0.5913  time: 28s
Epoch 2 - Score: 0.7223
INFO:__main__:Epoch 2 - Score: 0.7223


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.4686(0.5913) 
f1 score : 0.2222222222222222
recall score : 0.13157894736842105
precision score : 0.7142857142857143
thresh : 0.36
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 29s) Loss: 0.3198(0.3198) LR: 0.00001310  
Epoch: [3][100/279] Elapsed 0m 9s (remain 0m 16s) Loss: 0.1619(0.2532) LR: 0.00001091  
Epoch: [3][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.1801(0.2284) LR: 0.00000867  
Epoch: [3][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.2146(0.2195) LR: 0.00000696  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6987(0.6987) 


Epoch 3 - avg_train_loss: 0.2195  avg_val_loss: 0.6652  time: 27s
INFO:__main__:Epoch 3 - avg_train_loss: 0.2195  avg_val_loss: 0.6652  time: 27s
Epoch 3 - Score: 0.7183
INFO:__main__:Epoch 3 - Score: 0.7183


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.4588(0.6652) 
f1 score : 0.354978354978355
recall score : 0.26973684210526316
precision score : 0.5189873417721519
thresh : 0.61
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 38s) Loss: 0.0464(0.0464) LR: 0.00000694  
Epoch: [4][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.0309(0.0461) LR: 0.00000490  
Epoch: [4][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.0310(0.0404) LR: 0.00000311  
Epoch: [4][278/279] Elapsed 0m 24s (remain 0m 0s) Loss: 0.0193(0.0390) LR: 0.00000195  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.7791(0.7791) 


Epoch 4 - avg_train_loss: 0.0390  avg_val_loss: 0.8022  time: 27s
INFO:__main__:Epoch 4 - avg_train_loss: 0.0390  avg_val_loss: 0.8022  time: 27s
Epoch 4 - Score: 0.7123
INFO:__main__:Epoch 4 - Score: 0.7123


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.5021(0.8022) 
f1 score : 0.3614457831325301
recall score : 0.29605263157894735
precision score : 0.4639175257731959
thresh : 0.75
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 35s) Loss: 0.0163(0.0163) LR: 0.00000194  
Epoch: [5][100/279] Elapsed 0m 9s (remain 0m 15s) Loss: 0.0183(0.0182) LR: 0.00000082  
Epoch: [5][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.0194(0.0180) LR: 0.00000017  
Epoch: [5][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.0112(0.0176) LR: 0.00000000  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.8766(0.8766) 


Epoch 5 - avg_train_loss: 0.0176  avg_val_loss: 0.8727  time: 27s
INFO:__main__:Epoch 5 - avg_train_loss: 0.0176  avg_val_loss: 0.8727  time: 27s
Epoch 5 - Score: 0.7143
INFO:__main__:Epoch 5 - Score: 0.7143


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.5024(0.8727) 
f1 score : 0.3489361702127659
recall score : 0.26973684210526316
precision score : 0.4939759036144578
thresh : 0.73


Score: 0.7203
INFO:__main__:Score: 0.7203
ACC BEST Score: 0.7243
INFO:__main__:ACC BEST Score: 0.7243
RobertaConfig {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_roberta_base_epoch10",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.28.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "voc

f1 score : 0.3285024154589372
recall score : 0.2236842105263158
precision score : 0.6181818181818182
thresh : 0.54


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_roberta_base_epoch10 were not used when initializing RobertaModel: ['lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Enable AWP
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 31s) Loss: 0.6472(0.6472) LR: 0.00002000  
Epoch: [1][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.5568(0.6084) LR: 0.00001974  
Epoch: [1][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.6266(0.6058) LR: 0.00001900  
Epoch: [1][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.6379(0.6002) LR: 0.00001810  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6532(0.6532) 


Epoch 1 - avg_train_loss: 0.6002  avg_val_loss: 0.5789  time: 27s
INFO:__main__:Epoch 1 - avg_train_loss: 0.6002  avg_val_loss: 0.5789  time: 27s
Epoch 1 - Score: 0.7082
INFO:__main__:Epoch 1 - Score: 0.7082
Epoch 1 - Save Best Score: 0.7082 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7082 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.7127(0.5789) 
f1 score : 0.11834319526627217
recall score : 0.06578947368421052
precision score : 0.5882352941176471
thresh : 0.6
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 28s) Loss: 0.6361(0.6361) LR: 0.00001809  
Epoch: [2][100/279] Elapsed 0m 9s (remain 0m 16s) Loss: 0.5819(0.5038) LR: 0.00001657  
Epoch: [2][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.5470(0.5001) LR: 0.00001473  
Epoch: [2][278/279] Elapsed 0m 24s (remain 0m 0s) Loss: 0.4031(0.4935) LR: 0.00001312  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.7097(0.7097) 


Epoch 2 - avg_train_loss: 0.4935  avg_val_loss: 0.5854  time: 27s
INFO:__main__:Epoch 2 - avg_train_loss: 0.4935  avg_val_loss: 0.5854  time: 27s
Epoch 2 - Score: 0.7243
INFO:__main__:Epoch 2 - Score: 0.7243
Epoch 2 - Save Best Score: 0.7243 Model
INFO:__main__:Epoch 2 - Save Best Score: 0.7243 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.7729(0.5854) 
f1 score : 0.15476190476190477
recall score : 0.08552631578947369
precision score : 0.8125
thresh : 0.45
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 34s) Loss: 0.2799(0.2799) LR: 0.00001310  
Epoch: [3][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.2529(0.2659) LR: 0.00001091  
Epoch: [3][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.3470(0.2439) LR: 0.00000867  
Epoch: [3][278/279] Elapsed 0m 24s (remain 0m 0s) Loss: 0.4136(0.2334) LR: 0.00000696  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.9003(0.9003) 


Epoch 3 - avg_train_loss: 0.2334  avg_val_loss: 0.6073  time: 27s
INFO:__main__:Epoch 3 - avg_train_loss: 0.2334  avg_val_loss: 0.6073  time: 27s
Epoch 3 - Score: 0.7183
INFO:__main__:Epoch 3 - Score: 0.7183


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.6569(0.6073) 
f1 score : 0.35443037974683544
recall score : 0.27631578947368424
precision score : 0.49411764705882355
thresh : 0.62
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 36s) Loss: 0.0565(0.0565) LR: 0.00000694  
Epoch: [4][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.0317(0.0464) LR: 0.00000490  
Epoch: [4][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.0306(0.0420) LR: 0.00000311  
Epoch: [4][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.0229(0.0393) LR: 0.00000195  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 1.2284(1.2284) 


Epoch 4 - avg_train_loss: 0.0393  avg_val_loss: 0.7301  time: 27s
INFO:__main__:Epoch 4 - avg_train_loss: 0.0393  avg_val_loss: 0.7301  time: 27s
Epoch 4 - Score: 0.7243
INFO:__main__:Epoch 4 - Score: 0.7243


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.8164(0.7301) 
f1 score : 0.3770491803278689
recall score : 0.3026315789473684
precision score : 0.5
thresh : 0.77
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 31s) Loss: 0.0170(0.0170) LR: 0.00000194  
Epoch: [5][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.0107(0.0174) LR: 0.00000082  
Epoch: [5][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.0174(0.0186) LR: 0.00000017  
Epoch: [5][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.0108(0.0183) LR: 0.00000000  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 1.3195(1.3195) 


Epoch 5 - avg_train_loss: 0.0183  avg_val_loss: 0.7685  time: 27s
INFO:__main__:Epoch 5 - avg_train_loss: 0.0183  avg_val_loss: 0.7685  time: 27s
Epoch 5 - Score: 0.7223
INFO:__main__:Epoch 5 - Score: 0.7223


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.8481(0.7685) 
f1 score : 0.3870967741935484
recall score : 0.3157894736842105
precision score : 0.5
thresh : 0.78


Score: 0.7143
INFO:__main__:Score: 0.7143
ACC BEST Score: 0.7243
INFO:__main__:ACC BEST Score: 0.7243
RobertaConfig {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_roberta_base_epoch10",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.28.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "voc

f1 score : 0.15476190476190477
recall score : 0.08552631578947369
precision score : 0.8125
thresh : 0.45


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_roberta_base_epoch10 were not used when initializing RobertaModel: ['lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Enable AWP
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 42s) Loss: 0.6173(0.6173) LR: 0.00002000  
Epoch: [1][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.4000(0.6204) LR: 0.00001974  
Epoch: [1][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.6184(0.6041) LR: 0.00001900  
Epoch: [1][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.5111(0.6052) LR: 0.00001810  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.5945(0.5945) 


Epoch 1 - avg_train_loss: 0.6052  avg_val_loss: 0.5815  time: 27s
INFO:__main__:Epoch 1 - avg_train_loss: 0.6052  avg_val_loss: 0.5815  time: 27s
Epoch 1 - Score: 0.7082
INFO:__main__:Epoch 1 - Score: 0.7082
Epoch 1 - Save Best Score: 0.7082 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7082 Model


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.6640(0.5815) 
f1 score : 0.11042944785276074
recall score : 0.05921052631578947
precision score : 0.8181818181818182
thresh : 0.49
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 28s) Loss: 0.4281(0.4281) LR: 0.00001809  
Epoch: [2][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.4832(0.5030) LR: 0.00001657  
Epoch: [2][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.5398(0.5152) LR: 0.00001473  
Epoch: [2][278/279] Elapsed 0m 24s (remain 0m 0s) Loss: 0.5382(0.5041) LR: 0.00001312  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6181(0.6181) 


Epoch 2 - avg_train_loss: 0.5041  avg_val_loss: 0.5872  time: 27s
INFO:__main__:Epoch 2 - avg_train_loss: 0.5041  avg_val_loss: 0.5872  time: 27s
Epoch 2 - Score: 0.7223
INFO:__main__:Epoch 2 - Score: 0.7223
Epoch 2 - Save Best Score: 0.7223 Model
INFO:__main__:Epoch 2 - Save Best Score: 0.7223 Model


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.6084(0.5872) 
f1 score : 0.4218181818181818
recall score : 0.3815789473684211
precision score : 0.4715447154471545
thresh : 0.59
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 31s) Loss: 0.3631(0.3631) LR: 0.00001310  
Epoch: [3][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.1586(0.2542) LR: 0.00001091  
Epoch: [3][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.1667(0.2280) LR: 0.00000867  
Epoch: [3][278/279] Elapsed 0m 24s (remain 0m 0s) Loss: 0.1777(0.2186) LR: 0.00000696  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6870(0.6870) 


Epoch 3 - avg_train_loss: 0.2186  avg_val_loss: 0.6492  time: 27s
INFO:__main__:Epoch 3 - avg_train_loss: 0.2186  avg_val_loss: 0.6492  time: 27s
Epoch 3 - Score: 0.7304
INFO:__main__:Epoch 3 - Score: 0.7304
Epoch 3 - Save Best Score: 0.7304 Model
INFO:__main__:Epoch 3 - Save Best Score: 0.7304 Model


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.7533(0.6492) 
f1 score : 0.34977578475336324
recall score : 0.2565789473684211
precision score : 0.5492957746478874
thresh : 0.58
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 36s) Loss: 0.0470(0.0470) LR: 0.00000694  
Epoch: [4][100/279] Elapsed 0m 9s (remain 0m 16s) Loss: 0.0320(0.0443) LR: 0.00000490  
Epoch: [4][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.0282(0.0395) LR: 0.00000311  
Epoch: [4][278/279] Elapsed 0m 24s (remain 0m 0s) Loss: 0.0186(0.0371) LR: 0.00000195  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.7062(0.7062) 


Epoch 4 - avg_train_loss: 0.0371  avg_val_loss: 0.7086  time: 28s
INFO:__main__:Epoch 4 - avg_train_loss: 0.0371  avg_val_loss: 0.7086  time: 28s
Epoch 4 - Score: 0.7264
INFO:__main__:Epoch 4 - Score: 0.7264


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.8069(0.7086) 
f1 score : 0.4129554655870445
recall score : 0.3355263157894737
precision score : 0.5368421052631579
thresh : 0.7
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 35s) Loss: 0.0257(0.0257) LR: 0.00000194  
Epoch: [5][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.0123(0.0180) LR: 0.00000082  
Epoch: [5][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.0186(0.0176) LR: 0.00000017  
Epoch: [5][278/279] Elapsed 0m 24s (remain 0m 0s) Loss: 0.0139(0.0172) LR: 0.00000000  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.7571(0.7571) 


Epoch 5 - avg_train_loss: 0.0172  avg_val_loss: 0.7444  time: 27s
INFO:__main__:Epoch 5 - avg_train_loss: 0.0172  avg_val_loss: 0.7444  time: 27s
Epoch 5 - Score: 0.7284
INFO:__main__:Epoch 5 - Score: 0.7284


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.8730(0.7444) 
f1 score : 0.38655462184873945
recall score : 0.3026315789473684
precision score : 0.5348837209302325
thresh : 0.69


Score: 0.7082
INFO:__main__:Score: 0.7082
ACC BEST Score: 0.7304
INFO:__main__:ACC BEST Score: 0.7304
RobertaConfig {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_roberta_base_epoch10",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.28.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "voc

f1 score : 0.34977578475336324
recall score : 0.2565789473684211
precision score : 0.5492957746478874
thresh : 0.58


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_roberta_base_epoch10 were not used when initializing RobertaModel: ['lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Enable AWP
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 37s) Loss: 0.6965(0.6965) LR: 0.00002000  
Epoch: [1][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.4701(0.6189) LR: 0.00001974  
Epoch: [1][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.5288(0.6044) LR: 0.00001900  
Epoch: [1][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.6045(0.6041) LR: 0.00001810  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5786(0.5786) 


Epoch 1 - avg_train_loss: 0.6041  avg_val_loss: 0.5918  time: 27s
INFO:__main__:Epoch 1 - avg_train_loss: 0.6041  avg_val_loss: 0.5918  time: 27s
Epoch 1 - Score: 0.7143
INFO:__main__:Epoch 1 - Score: 0.7143
Epoch 1 - Save Best Score: 0.7143 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7143 Model


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.5150(0.5918) 
f1 score : 0.11904761904761904
recall score : 0.06578947368421052
precision score : 0.625
thresh : 0.47
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 41s) Loss: 0.4824(0.4824) LR: 0.00001809  
Epoch: [2][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.5100(0.5024) LR: 0.00001657  
Epoch: [2][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.4620(0.5000) LR: 0.00001473  
Epoch: [2][278/279] Elapsed 0m 24s (remain 0m 0s) Loss: 0.3693(0.4923) LR: 0.00001312  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6302(0.6302) 


Epoch 2 - avg_train_loss: 0.4923  avg_val_loss: 0.6306  time: 27s
INFO:__main__:Epoch 2 - avg_train_loss: 0.4923  avg_val_loss: 0.6306  time: 27s
Epoch 2 - Score: 0.7143
INFO:__main__:Epoch 2 - Score: 0.7143


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.4623(0.6306) 
f1 score : 0.12941176470588237
recall score : 0.07236842105263158
precision score : 0.6111111111111112
thresh : 0.42
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 39s) Loss: 0.2081(0.2081) LR: 0.00001310  
Epoch: [3][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.1703(0.2400) LR: 0.00001091  
Epoch: [3][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.2792(0.2223) LR: 0.00000867  
Epoch: [3][278/279] Elapsed 0m 24s (remain 0m 0s) Loss: 0.3107(0.2103) LR: 0.00000696  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6911(0.6911) 


Epoch 3 - avg_train_loss: 0.2103  avg_val_loss: 0.6674  time: 27s
INFO:__main__:Epoch 3 - avg_train_loss: 0.2103  avg_val_loss: 0.6674  time: 27s
Epoch 3 - Score: 0.7082
INFO:__main__:Epoch 3 - Score: 0.7082


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.5164(0.6674) 
f1 score : 0.2883720930232558
recall score : 0.20394736842105263
precision score : 0.49206349206349204
thresh : 0.76
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 36s) Loss: 0.0657(0.0657) LR: 0.00000694  
Epoch: [4][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.0553(0.0394) LR: 0.00000490  
Epoch: [4][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.0501(0.0344) LR: 0.00000311  
Epoch: [4][278/279] Elapsed 0m 24s (remain 0m 0s) Loss: 0.0282(0.0326) LR: 0.00000195  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.8390(0.8390) 


Epoch 4 - avg_train_loss: 0.0326  avg_val_loss: 0.8172  time: 27s
INFO:__main__:Epoch 4 - avg_train_loss: 0.0326  avg_val_loss: 0.8172  time: 27s
Epoch 4 - Score: 0.7022
INFO:__main__:Epoch 4 - Score: 0.7022


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.5816(0.8172) 
f1 score : 0.33043478260869563
recall score : 0.25
precision score : 0.48717948717948717
thresh : 0.78
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 43s) Loss: 0.0136(0.0136) LR: 0.00000194  
Epoch: [5][100/279] Elapsed 0m 9s (remain 0m 16s) Loss: 0.0230(0.0160) LR: 0.00000082  
Epoch: [5][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.0103(0.0158) LR: 0.00000017  
Epoch: [5][278/279] Elapsed 0m 24s (remain 0m 0s) Loss: 0.0172(0.0155) LR: 0.00000000  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.8622(0.8622) 


Epoch 5 - avg_train_loss: 0.0155  avg_val_loss: 0.8354  time: 27s
INFO:__main__:Epoch 5 - avg_train_loss: 0.0155  avg_val_loss: 0.8354  time: 27s
Epoch 5 - Score: 0.6982
INFO:__main__:Epoch 5 - Score: 0.6982


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.5950(0.8354) 
f1 score : 0.3688524590163934
recall score : 0.29605263157894735
precision score : 0.4891304347826087
thresh : 0.77


Score: 0.7022
INFO:__main__:Score: 0.7022
ACC BEST Score: 0.7143
INFO:__main__:ACC BEST Score: 0.7143
Score: 0.6990
INFO:__main__:Score: 0.6990
ACC BEST Score: 0.7103
INFO:__main__:ACC BEST Score: 0.7103


In [None]:
from google.colab import runtime
runtime.unassign()