In [1]:
from google.colab import drive
drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
!pip install transformers
!pip install datasets
!pip install sentencepiece

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [3]:
!nvidia-smi

Sat May  6 13:34:59 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA A100-SXM...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   34C    P0    46W / 400W |      0MiB / 40960MiB |      0%      Default |
|                               |                      |             Disabled |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [4]:

import os
import gc
import math
import time
import random
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.simplefilter('ignore')
from tqdm import tqdm
import re
import html

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import Adam, SGD, AdamW, RAdam
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader, Dataset

from sklearn.model_selection import StratifiedKFold,StratifiedGroupKFold,GroupKFold
from sklearn.metrics import log_loss,f1_score, recall_score, accuracy_score, precision_score

from transformers import AutoModel, AutoConfig, AutoTokenizer, AdamW, DataCollatorWithPadding
from transformers import get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [5]:
DIR = "/content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測"
INPUT_DIR = os.path.join(DIR,"input")
OUTPUT_DIR = os.path.join(DIR,"output")
CUSTOM_MODEL_DIR = os.path.join(OUTPUT_DIR,'clrp_muppet_epoch10')
OUTPUT_EXP_DIR = DIR + '/output/EXP073/'
if not os.path.exists(OUTPUT_EXP_DIR):
    os.makedirs(OUTPUT_EXP_DIR)

In [6]:

# ====================================================
# CFG
# ====================================================
class CFG:
    debug=False
    apex=True
    print_freq=100
    num_workers=4
    model_name="facebook/muppet-roberta-base"
    # model='microsoft/deberta-base'
    # model='roberta-base'
    # model='roberta-large'
    # model='roberta-large-mnli'
    # model='xlnet-large-cased'
    # model='albert-xxlarge-v2'
    # model="microsoft/deberta-large"
    # model="microsoft/deberta-v3-large"
    # model='microsoft/deberta-v2-xlarge'
    # model='funnel-transformer/large'
    # model='funnel-transformer/medium'
    # model='albert-base-v2'
    # model='albert-large-v2'
    # model='google/electra-large-discriminator'
    # model='google/electra-base-discriminator'
    # model="facebook/bart-large-mnli"
    # model="facebook/bart-large"
    # model="facebook/bart-base"
    model = CUSTOM_MODEL_DIR
    scheduler='cosine' # ['linear', 'cosine']
    batch_scheduler=True
    num_cycles=0.5
    num_warmup_steps=0
    epochs=5
    encoder_lr=2e-5
    decoder_lr=2e-5
    min_lr=1e-6
    eps=1e-6
    betas=(0.9, 0.999)
    batch_size=16
    fc_dropout=0.2
    target_size=1
    max_len=512
    weight_decay=0.01
    gradient_accumulation_steps=1
    max_grad_norm=1000
    seed=42
    n_fold=10
    trn_fold=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
    train=True
    nth_awp_start_epoch=1
    gradient_checkpointing = False
    freezing = False
    num_reinit_layers = 1
    is_reinit_layer = False
    fgm = False
    awp_start=1

if CFG.debug:
    CFG.epochs = 2
    CFG.trn_fold = [0, 1]

In [7]:
def get_score(labels, outputs):
    thresh = 0.5
    y_pred = outputs
    y_true = labels
    f_score = f1_score(y_true, (y_pred>thresh).astype(int))
    r_score = recall_score(y_true, (y_pred>thresh).astype(int))
    p_score = precision_score(y_true, (y_pred>thresh).astype(int))
    print(f"f1 score : {f_score}")
    print(f"recall score : {r_score}")
    print(f"precision score : {p_score}")
    return accuracy_score(y_true, (y_pred>thresh).astype(int))

def get_acc_score(labels, outputs):
    y_pred = outputs
    y_true = labels
    best_score = 0
    best_thresh = 0.5
    for thresh in np.arange(0.1, 0.80, 0.01):
        thresh = np.round(thresh, 2)
        score = accuracy_score(y_true, (y_pred>thresh).astype(int))
        #print("Accuracy score at threshold {0} is {1}".format(thresh, score))
        if score > best_score:
          best_score = score
          best_thresh = thresh
    print(f"thresh : {best_thresh}")
    return accuracy_score(y_true, (y_pred>best_thresh).astype(int))


def get_logger(filename=OUTPUT_EXP_DIR+'train'):
    from logging import getLogger, INFO, FileHandler, Formatter, StreamHandler
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=f"{filename}.log")
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

LOGGER = get_logger()

def seed_everything(seed=CFG.seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_everything(seed=CFG.seed)

In [8]:
def freeze(module):
    """
    Freezes module's parameters.
    """
    
    for parameter in module.parameters():
        parameter.requires_grad = False
        
def get_freezed_parameters(module):
    """
    Returns names of freezed parameters of the given module.
    """
    
    freezed_parameters = []
    for name, parameter in module.named_parameters():
        if not parameter.requires_grad:
            freezed_parameters.append(name)
            
    return freezed_parameters

def set_embedding_parameters_bits(embeddings_path, optim_bits=32):
    """
    https://github.com/huggingface/transformers/issues/14819#issuecomment-1003427930
    """
    
    embedding_types = ("word", "position", "token_type")
    for embedding_type in embedding_types:
        attr_name = f"{embedding_type}_embeddings"
        
        if hasattr(embeddings_path, attr_name): 
            bnb.optim.GlobalOptimManager.get_instance().register_module_override(
                getattr(embeddings_path, attr_name), 'weight', {'optim_bits': optim_bits}
            )

In [9]:
import pandas as pd
import numpy as np

train = pd.read_csv(os.path.join(INPUT_DIR,"train_data.csv"))
test = pd.read_csv(os.path.join(INPUT_DIR,"test_data.csv"))
sample_sub = pd.read_csv(os.path.join(INPUT_DIR,"submission.csv"))

train = train.sample(frac=1, random_state=CFG.seed).reset_index()

print(train.shape)
display(train.head(3))

print(test.shape)
display(test.head(3))

print(sample_sub.shape)
display(sample_sub.head(3))

(4974, 7)


Unnamed: 0,index,id,title,year,abstract,keywords,y
0,721,722,Global Optimality Conditions for Deep Neural N...,2018,We study the error landscape of deep linear an...,"deep linear neural networks, global optimality...",1
1,144,145,Multi-Task Learning by Deep Collaboration and ...,2018,Convolutional neural networks (CNN) have becom...,"multi-task learning, soft parameter sharing, f...",0
2,4542,4543,On the Need for Topology-Aware Generative Mode...,2020,"ML algorithms or models, especially deep neura...","Manifold-based Defense, Robust Learning, Adver...",1


(6393, 5)


Unnamed: 0,id,title,year,abstract,keywords
0,1,StyleAlign: Analysis and Applications of Align...,2022,"In this paper, we perform an in-depth study of...","StyleGAN, transfer learning, fine tuning, mode..."
1,2,Embedding a random graph via GNN: mean-field i...,2021,We develop a theory for embedding a random gra...,"Graph neural network, graph embedding, multi-r..."
2,3,BBRefinement: an universal scheme to improve p...,2021,We present a conceptually simple yet powerful ...,"object detection, deep neural networks, refine..."


(6393, 2)


Unnamed: 0,id,y
0,1,0
1,2,0
2,3,0


In [10]:
train["texts"] = train["title"] + "</s>" + train["abstract"] 

In [11]:
skf = StratifiedKFold(n_splits=CFG.n_fold,shuffle=True,random_state=CFG.seed)
for fold, ( _, val_) in enumerate(skf.split(train, train.y)):
    train.loc[val_ , "kfold"] = int(fold)
    
train["kfold"] = train["kfold"].astype(int)

if CFG.debug:
    display(train.groupby('kfold').size())
    train = train.sample(n=500, random_state=0).reset_index(drop=True)
    display(train.groupby('kfold').size())

In [12]:
# ====================================================
# tokenizer
# ====================================================
tokenizer = AutoTokenizer.from_pretrained(CFG.model)
tokenizer.save_pretrained(OUTPUT_EXP_DIR+'tokenizer/')
CFG.tokenizer = tokenizer

In [13]:
# ====================================================
# Define max_len
# ====================================================
lengths = []
#tk0 = tqdm(train['texts'].fillna("").values, total=len(train))
#for text in tk0:
#    length = len(tokenizer(text, add_special_tokens=False)['input_ids'])
#    lengths.append(length)
#CFG.max_len = max(lengths) + 3 # cls + sep + sep
#LOGGER.info(f"max_len: {CFG.max_len}")

In [14]:
class AWP:
    def __init__(self, model, optimizer, *, adv_param='weight',
                 adv_lr=0.001, adv_eps=0.001):
        self.model = model
        self.optimizer = optimizer
        self.adv_param = adv_param
        self.adv_lr = adv_lr
        self.adv_eps = adv_eps
        self.backup = {}

    def perturb(self, inputs, y, criterion):
        """
        Perturb model parameters for AWP gradient
        Call before loss and loss.backward()
        """
        self._save()  # save model parameters
        self._attack_step()  # perturb weights

    def _attack_step(self):
        e = 1e-6
        for name, param in self.model.named_parameters():
            if param.requires_grad and param.grad is not None and self.adv_param in name:
                grad = self.optimizer.state[param]['exp_avg']
                norm_grad = torch.norm(grad)
                norm_data = torch.norm(param.detach())

                if norm_grad != 0 and not torch.isnan(norm_grad):
                    # Set lower and upper limit in change
                    limit_eps = self.adv_eps * param.detach().abs()
                    param_min = param.data - limit_eps
                    param_max = param.data + limit_eps

                    # Perturb along gradient
                    # w += (adv_lr * |w| / |grad|) * grad
                    param.data.add_(grad, alpha=(self.adv_lr * (norm_data + e) / (norm_grad + e)))

                    # Apply the limit to the change
                    param.data.clamp_(param_min, param_max)

    def _save(self):
        for name, param in self.model.named_parameters():
            if param.requires_grad and param.grad is not None and self.adv_param in name:
                if name not in self.backup:
                    self.backup[name] = param.clone().detach()
                else:
                    self.backup[name].copy_(param.data)

    def restore(self):
        """
        Restore model parameter to correct position; AWP do not perturbe weights, it perturb gradients
        Call after loss.backward(), before optimizer.step()
        """
        for name, param in self.model.named_parameters():
            if name in self.backup:
                param.data.copy_(self.backup[name])

In [15]:
# ====================================================
# Dataset
# ====================================================
def prepare_input(cfg, text):
    inputs = cfg.tokenizer(text,
                           add_special_tokens=True,
                           max_length=cfg.max_len,
                           padding="max_length",
                           return_offsets_mapping=False,
                           truncation=True)
    for k, v in inputs.items():
        inputs[k] = torch.tensor(v, dtype=torch.long)
    return inputs


class TrainDataset(Dataset):
    def __init__(self, cfg, df):
        self.cfg = cfg
        self.inputs = df['texts'].values
        self.labels = df['y'].values

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, item):
        inputs = prepare_input(self.cfg, self.inputs[item])
        label = torch.tensor(self.labels[item], dtype=torch.float)
        return inputs, label

def collate(inputs):
    mask_len = int(inputs["attention_mask"].sum(axis=1).max())
    for k, v in inputs.items():
        inputs[k] = inputs[k][:,:mask_len]
    return inputs

def collate(inputs):
    mask_len = int(inputs["attention_mask"].sum(axis=1).max())
    for k, v in inputs.items():
        inputs[k] = inputs[k][:,:mask_len]
    return inputs

#collate_fn = DataCollatorWithPadding(tokenizer=tokenizer)

In [16]:
def reinit_layers(model):

    #for layer in model.model.encoder.layer[-CFG.num_reinit_layers:]:
    for layer in model.encoder.layer[-CFG.num_reinit_layers:]:    #Custome model内(backbone)

            for module in layer.modules():

                if isinstance(module,nn.Linear):
                    module.weight.data.normal_(mean=0.0,std=model.config.initializer_range)
                    if module.bias is not None:
                            module.bias.data.zero_()
                elif isinstance(module, nn.Embedding):
                        module.weight.data.normal_(mean=0.0, std=model.config.initializer_range)
                        if module.padding_idx is not None:
                            module.weight.data[module.padding_idx].zero_()
                elif isinstance(module, nn.LayerNorm):
                        module.bias.data.zero_()
                        module.weight.data.fill_(1.0)
                        
    return model

In [17]:
# ====================================================
# Model
# ====================================================
class MeanPooling(nn.Module):
    def __init__(self):
        super(MeanPooling, self).__init__()
        
    def forward(self, last_hidden_state, attention_mask):
        input_mask_expanded = attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float()
        sum_embeddings = torch.sum(last_hidden_state * input_mask_expanded, 1)
        sum_mask = input_mask_expanded.sum(1)
        sum_mask = torch.clamp(sum_mask, min=1e-9)
        mean_embeddings = sum_embeddings / sum_mask
        return mean_embeddings

class MaxPooling(nn.Module):
    def __init__(self):
        super(MaxPooling, self).__init__()
        
    def forward(self, last_hidden_state, attention_mask):
        input_mask_expanded = attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float()
        embeddings = last_hidden_state.clone()
        embeddings[input_mask_expanded == 0] = -1e4
        max_embeddings, _ = torch.max(embeddings, dim=1)
        return max_embeddings
    

class CustomModel(nn.Module):
    def __init__(self, cfg, config_path=None, pretrained=False):
        super().__init__()
        self.cfg = cfg
        if config_path is None:
            self.config = AutoConfig.from_pretrained(cfg.model, output_hidden_states=True)
            self.config.hidden_dropout = 0.
            self.config.hidden_dropout_prob = 0.
            self.config.attention_dropout = 0.
            self.config.attention_probs_dropout_prob = 0.
            LOGGER.info(self.config)
        else:
            self.config = torch.load(config_path)
        if pretrained:
            self.model = AutoModel.from_pretrained(cfg.model, config=self.config)
        else:
            self.model = AutoModel(self.config)
        if CFG.is_reinit_layer:
            self.model = reinit_layers(self.model)
            print(f'Reinitializing Last {CFG.num_reinit_layers} Layers.')
        if self.cfg.gradient_checkpointing:
            self.model.gradient_checkpointing_enable()

        # Freezing
        if cfg.freezing:
            # freezing embeddings and first 2 layers of encoder
            freeze((self.model).embeddings)
            freeze((self.model).encoder.layer[:2])
            cfg.after_freezed_parameters = filter(lambda parameter: parameter.requires_grad, (self.model).parameters())

        self.pool = MaxPooling()
        self.fc = nn.Linear(self.config.hidden_size, cfg.target_size)
        self._init_weights(self.fc)
        self.layer_norm1 = nn.LayerNorm(self.config.hidden_size)
        
    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.bias is not None:
                module.bias.data.zero_()
        elif isinstance(module, nn.Embedding):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.padding_idx is not None:
                module.weight.data[module.padding_idx].zero_()
        elif isinstance(module, nn.LayerNorm):
            module.bias.data.zero_()
            module.weight.data.fill_(1.0)
        
    def feature(self, inputs):
        outputs = self.model(**inputs)
        last_hidden_states = outputs[0]
        feature = self.pool(last_hidden_states, inputs['attention_mask'])
        return feature

    def forward(self, inputs):
        feature = self.feature(inputs)
        feature = self.layer_norm1(feature)
        output = self.fc(feature)
        return output

In [18]:
def calculate_loss(inputs, labels, model, criterion, is_valid=True, device="cpu"):    
    y_preds = model(inputs)
    loss = criterion(y_preds.view(-1, 1), labels.view(-1, 1))
    return (loss, y_preds) if is_valid else loss

In [19]:

# ====================================================
# Helper functions
# ====================================================
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (remain %s)' % (asMinutes(s), asMinutes(rs))


def train_fn(fold, train_loader, model, criterion, optimizer, epoch, scheduler, device, awp):
    model.zero_grad()
    model.train()
    awp_start = CFG.awp_start
    scaler = torch.cuda.amp.GradScaler(enabled=CFG.apex)
    losses = AverageMeter()
    start = end = time.time()
    global_step = 0
    for step, (inputs, labels) in enumerate(train_loader):
        inputs = collate(inputs)
        for k, v in inputs.items():
            inputs[k] = v.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        if epoch >= awp_start:
            awp.perturb(inputs, labels, criterion)
        with torch.cuda.amp.autocast(enabled=CFG.apex):
            loss = calculate_loss(inputs=inputs, labels=labels, model=model, criterion=criterion, is_valid=False, device=device)
        #print(y_preds.sigmoid().squeeze().view(1, -1))
        #loss = criterion(y_preds.sigmoid().squeeze(), labels.squeeze())
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        losses.update(loss.item(), batch_size)
        scaler.scale(loss).backward()
        if scaler is not None:
            scaler.unscale_(optimizer)
        torch.nn.utils.clip_grad_norm_(model.parameters(), CFG.max_grad_norm)
        awp.restore()
        if CFG.fgm:
          fgm.attack() 
          adversarial_loss = calculate_loss(inputs=inputs, labels=labels, model=model, criterion=criterion, is_valid=False, device=device)
          scaler.scale(adversarial_loss).backward()
          fgm.restore()
        if (step + 1) % CFG.gradient_accumulation_steps == 0:
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()
            global_step += 1
            if CFG.batch_scheduler:
                scheduler.step()
        end = time.time()
        if step % CFG.print_freq == 0 or step == (len(train_loader)-1):
            print('Epoch: [{0}][{1}/{2}] '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  #'Grad: {grad_norm:.4f}  '
                  'LR: {lr:.8f}  '
                  .format(epoch+1, step, len(train_loader), 
                          remain=timeSince(start, float(step+1)/len(train_loader)),
                          loss=losses,
                          #grad_norm=grad_norm,
                          lr=scheduler.get_lr()[0]))

    return losses.avg


def valid_fn(valid_loader, model, criterion, device):
    losses = AverageMeter()
    model.eval()
    preds = []
    start = end = time.time()
    for step, (inputs, labels) in enumerate(valid_loader):
        inputs = collate(inputs)
        for k, v in inputs.items():
            inputs[k] = v.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        with torch.no_grad():
            loss, y_preds = calculate_loss(inputs=inputs, labels=labels, model=model, criterion=criterion, is_valid=True, device=device)
        #loss = criterion(y_preds.sigmoid().squeeze(), labels.squeeze())
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        losses.update(loss.item(), batch_size)
        preds.append(y_preds.sigmoid().to('cpu').numpy())
        end = time.time()
        if step % CFG.print_freq == 0 or step == (len(valid_loader)-1):
            print('EVAL: [{0}/{1}] '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  .format(step, len(valid_loader),
                          loss=losses,
                          remain=timeSince(start, float(step+1)/len(valid_loader))))
    predictions = np.concatenate(preds)
    predictions = np.concatenate(predictions)
    return losses.avg, predictions


def inference_fn(test_loader, model, device):
    preds = []
    model.eval()
    model.to(device)
    tk0 = tqdm(test_loader, total=len(test_loader))
    for inputs in tk0:
        inputs = collate(inputs)
        for k, v in inputs.items():
            inputs[k] = v.to(device)
        with torch.no_grad():
            y_preds = model(inputs)
        preds.append(y_preds.sigmoid().to('cpu').numpy())
    predictions = np.concatenate(preds)
    return predictions

In [20]:

# ====================================================
# train loop
# ====================================================
def train_loop(folds, fold):
    
    LOGGER.info(f"========== fold: {fold} training ==========")

    # ====================================================
    # loader
    # ====================================================
    train_folds = folds[folds['kfold'] != fold].reset_index(drop=True)
    valid_folds = folds[folds['kfold'] == fold].reset_index(drop=True)
    valid_labels = valid_folds['y'].values
    
    train_dataset = TrainDataset(CFG, train_folds)
    valid_dataset = TrainDataset(CFG, valid_folds)


    train_loader = DataLoader(train_dataset,
                              batch_size=CFG.batch_size,
                              shuffle=True,
                              num_workers=CFG.num_workers, pin_memory=True, drop_last=True)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=CFG.batch_size*2,
                              shuffle=False,
                              num_workers=CFG.num_workers, pin_memory=True, drop_last=False)

    # ====================================================
    # model & optimizer
    # ====================================================
    model = CustomModel(CFG, config_path=None, pretrained=True)
    torch.save(model.config, OUTPUT_EXP_DIR+'config.pth')
    model.to(device)
    
    def get_optimizer_params(model, encoder_lr=5e-6, decoder_lr=1e-4, weight_decay=0.0):
        param_optimizer = list(model.named_parameters())
        no_decay = ["bias", "LayerNorm.bias", 
                    "LayerNorm.weight"]
        group1=['layer.0.','layer.1.','layer.2.','layer.3.']
        group2=['layer.4.','layer.5.','layer.6.','layer.7.']    
        group3=['layer.8.','layer.9.','layer.10.','layer.11.']
        group_all=['layer.0.','layer.1.','layer.2.','layer.3.','layer.4.','layer.5.','layer.6.','layer.7.','layer.8.','layer.9.','layer.10.','layer.11.']
        optimizer_parameters = [
        {'params': [p for n, p in model.model.named_parameters() if not any(nd in n for nd in no_decay) and not any(nd in n for nd in group_all)],'weight_decay': weight_decay},
        {'params': [p for n, p in model.model.named_parameters() if not any(nd in n for nd in no_decay) and any(nd in n for nd in group1)],'weight_decay': weight_decay, 'lr': encoder_lr/2.6},
        {'params': [p for n, p in model.model.named_parameters() if not any(nd in n for nd in no_decay) and any(nd in n for nd in group2)],'weight_decay': weight_decay, 'lr': encoder_lr},
        {'params': [p for n, p in model.model.named_parameters() if not any(nd in n for nd in no_decay) and any(nd in n for nd in group3)],'weight_decay': weight_decay, 'lr': encoder_lr*2.6},
        {'params': [p for n, p in model.model.named_parameters() if any(nd in n for nd in no_decay) and not any(nd in n for nd in group_all)],'weight_decay': 0.0},
        {'params': [p for n, p in model.model.named_parameters() if any(nd in n for nd in no_decay) and any(nd in n for nd in group1)],'weight_decay': 0.0, 'lr': encoder_lr/2.6},
        {'params': [p for n, p in model.model.named_parameters() if any(nd in n for nd in no_decay) and any(nd in n for nd in group2)],'weight_decay': 0.0, 'lr': encoder_lr},
        {'params': [p for n, p in model.model.named_parameters() if any(nd in n for nd in no_decay) and any(nd in n for nd in group3)],'weight_decay': 0.0, 'lr': encoder_lr*2.6},
        {'params': [p for n, p in model.named_parameters() if "model" not in n], 'lr':decoder_lr, "momentum" : 0.99},
    ]
        return optimizer_parameters

    optimizer_parameters = get_optimizer_params(model,
                                                encoder_lr=CFG.encoder_lr, 
                                                decoder_lr=CFG.decoder_lr,
                                                weight_decay=CFG.weight_decay)
    optimizer = AdamW(optimizer_parameters, lr=CFG.encoder_lr, eps=CFG.eps, betas=CFG.betas)
    
    # ====================================================
    # scheduler
    # ====================================================
    def get_scheduler(cfg, optimizer, num_train_steps):
        if cfg.scheduler == 'linear':
            scheduler = get_linear_schedule_with_warmup(
                optimizer, num_warmup_steps=cfg.num_warmup_steps, num_training_steps=num_train_steps
            )
        elif cfg.scheduler == 'cosine':
            scheduler = get_cosine_schedule_with_warmup(
                optimizer, num_warmup_steps=cfg.num_warmup_steps, num_training_steps=num_train_steps, num_cycles=cfg.num_cycles
            )
        return scheduler
    
    num_train_steps = int(len(train_folds) / CFG.batch_size * CFG.epochs)
    scheduler = get_scheduler(CFG, optimizer, num_train_steps)

    # ====================================================
    # loop
    # ====================================================
    criterion = nn.BCEWithLogitsLoss()

    print('Enable AWP')
    awp = AWP(model, optimizer, adv_lr=0.001, adv_eps=0.001)
    #print('Enable FGM')
    #fgm = FGM(model=model, eps=0.1)
    
    best_score = -1.

    for epoch in range(CFG.epochs):

        start_time = time.time()

        # train
        avg_loss = train_fn(fold, train_loader, model, criterion, optimizer, epoch, scheduler, device, awp)

        # eval
        avg_val_loss, predictions = valid_fn(valid_loader, model, criterion, device)
        
        # scoring
        score_05 = get_score(valid_labels, predictions)
        score = get_acc_score(valid_labels, predictions)

        elapsed = time.time() - start_time

        LOGGER.info(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.0f}s')
        LOGGER.info(f'Epoch {epoch+1} - Score: {score:.4f}')

        
        if best_score < score:
            best_score = score
            LOGGER.info(f'Epoch {epoch+1} - Save Best Score: {best_score:.4f} Model')
            torch.save({'model': model.state_dict(),
                        'predictions': predictions},
                        OUTPUT_EXP_DIR+f"{CFG.model_name.replace('/', '-')}_fold{fold}_best.pth")

    predictions = torch.load(OUTPUT_EXP_DIR+f"{CFG.model_name.replace('/', '-')}_fold{fold}_best.pth", 
                             map_location=torch.device('cpu'))['predictions']
    valid_folds['pred'] = predictions

    torch.cuda.empty_cache()
    gc.collect()
    
    return valid_folds

In [21]:
if __name__ == '__main__':
    
    def get_result(oof_df):
        labels = oof_df['y'].values
        preds = oof_df['pred'].values
        score = get_score(labels, preds)
        acc_score = get_acc_score(labels, preds)
        LOGGER.info(f'Score: {score:<.4f}')
        LOGGER.info(f'ACC BEST Score: {acc_score:<.4f}')
    
    if CFG.train:
        oof_df = pd.DataFrame()
        for fold in range(CFG.n_fold):
            if fold in CFG.trn_fold:
                _oof_df = train_loop(train, fold)
                oof_df = pd.concat([oof_df, _oof_df])
                LOGGER.info(f"========== fold: {fold} result ==========")
                get_result(_oof_df)
            #break
        oof_df = oof_df.reset_index(drop=True)
        LOGGER.info(f"========== CV ==========")
        get_result(oof_df)
        oof_df.to_pickle(OUTPUT_EXP_DIR+'oof_df.pkl')

RobertaConfig {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_muppet_epoch10",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.28.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 50265
}

INFO:__main__:RobertaConfig {
  "_name_or_path": "/content/drive/MyDrive/Competitions/pro

Enable AWP
Epoch: [1][0/279] Elapsed 0m 1s (remain 6m 40s) Loss: 0.6256(0.6256) LR: 0.00002000  
Epoch: [1][100/279] Elapsed 0m 9s (remain 0m 17s) Loss: 0.4150(0.6143) LR: 0.00001974  
Epoch: [1][200/279] Elapsed 0m 18s (remain 0m 7s) Loss: 0.6734(0.6107) LR: 0.00001900  
Epoch: [1][278/279] Elapsed 0m 24s (remain 0m 0s) Loss: 0.4444(0.5971) LR: 0.00001810  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5388(0.5388) 


Epoch 1 - avg_train_loss: 0.5971  avg_val_loss: 0.5987  time: 28s
INFO:__main__:Epoch 1 - avg_train_loss: 0.5971  avg_val_loss: 0.5987  time: 28s
Epoch 1 - Score: 0.7108
INFO:__main__:Epoch 1 - Score: 0.7108
Epoch 1 - Save Best Score: 0.7108 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7108 Model


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.7149(0.5987) 
f1 score : 0.439344262295082
recall score : 0.43790849673202614
precision score : 0.4407894736842105
thresh : 0.58
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 22s) Loss: 0.5385(0.5385) LR: 0.00001808  
Epoch: [2][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.3988(0.5073) LR: 0.00001657  
Epoch: [2][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.2979(0.4856) LR: 0.00001473  
Epoch: [2][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.3824(0.4882) LR: 0.00001312  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4985(0.4985) 


Epoch 2 - avg_train_loss: 0.4882  avg_val_loss: 0.5903  time: 27s
INFO:__main__:Epoch 2 - avg_train_loss: 0.4882  avg_val_loss: 0.5903  time: 27s
Epoch 2 - Score: 0.7149
INFO:__main__:Epoch 2 - Score: 0.7149
Epoch 2 - Save Best Score: 0.7149 Model
INFO:__main__:Epoch 2 - Save Best Score: 0.7149 Model


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.7809(0.5903) 
f1 score : 0.42748091603053434
recall score : 0.3660130718954248
precision score : 0.5137614678899083
thresh : 0.64
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 24s) Loss: 0.2714(0.2714) LR: 0.00001309  
Epoch: [3][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.2234(0.2288) LR: 0.00001090  
Epoch: [3][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.2250(0.2073) LR: 0.00000866  
Epoch: [3][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.1520(0.1936) LR: 0.00000695  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4392(0.4392) 


Epoch 3 - avg_train_loss: 0.1936  avg_val_loss: 0.6674  time: 26s
INFO:__main__:Epoch 3 - avg_train_loss: 0.1936  avg_val_loss: 0.6674  time: 26s
Epoch 3 - Score: 0.7189
INFO:__main__:Epoch 3 - Score: 0.7189
Epoch 3 - Save Best Score: 0.7189 Model
INFO:__main__:Epoch 3 - Save Best Score: 0.7189 Model


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.9861(0.6674) 
f1 score : 0.3852459016393443
recall score : 0.30718954248366015
precision score : 0.5164835164835165
thresh : 0.65
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 30s) Loss: 0.0391(0.0391) LR: 0.00000693  
Epoch: [4][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.0208(0.0341) LR: 0.00000488  
Epoch: [4][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.0163(0.0294) LR: 0.00000310  
Epoch: [4][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.0205(0.0274) LR: 0.00000194  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4907(0.4907) 


Epoch 4 - avg_train_loss: 0.0274  avg_val_loss: 0.7903  time: 27s
INFO:__main__:Epoch 4 - avg_train_loss: 0.0274  avg_val_loss: 0.7903  time: 27s
Epoch 4 - Score: 0.7209
INFO:__main__:Epoch 4 - Score: 0.7209
Epoch 4 - Save Best Score: 0.7209 Model
INFO:__main__:Epoch 4 - Save Best Score: 0.7209 Model


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 1.1839(0.7903) 
f1 score : 0.37656903765690375
recall score : 0.29411764705882354
precision score : 0.5232558139534884
thresh : 0.64
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 29s) Loss: 0.0146(0.0146) LR: 0.00000193  
Epoch: [5][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.0124(0.0140) LR: 0.00000082  
Epoch: [5][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.0252(0.0140) LR: 0.00000017  
Epoch: [5][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.0143(0.0138) LR: 0.00000000  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4848(0.4848) 


Epoch 5 - avg_train_loss: 0.0138  avg_val_loss: 0.8016  time: 27s
INFO:__main__:Epoch 5 - avg_train_loss: 0.0138  avg_val_loss: 0.8016  time: 27s
Epoch 5 - Score: 0.7209
INFO:__main__:Epoch 5 - Score: 0.7209


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 1.2099(0.8016) 
f1 score : 0.39344262295081966
recall score : 0.3137254901960784
precision score : 0.5274725274725275
thresh : 0.68


Score: 0.7008
INFO:__main__:Score: 0.7008
ACC BEST Score: 0.7209
INFO:__main__:ACC BEST Score: 0.7209
RobertaConfig {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_muppet_epoch10",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.28.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_siz

f1 score : 0.37656903765690375
recall score : 0.29411764705882354
precision score : 0.5232558139534884
thresh : 0.64


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_muppet_epoch10 were not used when initializing RobertaModel: ['lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Enable AWP
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 27s) Loss: 0.7317(0.7317) LR: 0.00002000  
Epoch: [1][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.5484(0.6095) LR: 0.00001974  
Epoch: [1][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.5185(0.6046) LR: 0.00001900  
Epoch: [1][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.6562(0.5988) LR: 0.00001810  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4832(0.4832) 


Epoch 1 - avg_train_loss: 0.5988  avg_val_loss: 0.5757  time: 26s
INFO:__main__:Epoch 1 - avg_train_loss: 0.5988  avg_val_loss: 0.5757  time: 26s
Epoch 1 - Score: 0.7149
INFO:__main__:Epoch 1 - Score: 0.7149
Epoch 1 - Save Best Score: 0.7149 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7149 Model


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.4927(0.5757) 
f1 score : 0.2755102040816327
recall score : 0.17647058823529413
precision score : 0.627906976744186
thresh : 0.5
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 24s) Loss: 0.3416(0.3416) LR: 0.00001808  
Epoch: [2][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.5123(0.4627) LR: 0.00001657  
Epoch: [2][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.3469(0.4534) LR: 0.00001473  
Epoch: [2][278/279] Elapsed 0m 24s (remain 0m 0s) Loss: 0.3026(0.4547) LR: 0.00001312  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4729(0.4729) 


Epoch 2 - avg_train_loss: 0.4547  avg_val_loss: 0.5875  time: 27s
INFO:__main__:Epoch 2 - avg_train_loss: 0.4547  avg_val_loss: 0.5875  time: 27s
Epoch 2 - Score: 0.7048
INFO:__main__:Epoch 2 - Score: 0.7048


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.4401(0.5875) 
f1 score : 0.2512562814070352
recall score : 0.16339869281045752
precision score : 0.5434782608695652
thresh : 0.6
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 30s) Loss: 0.2159(0.2159) LR: 0.00001309  
Epoch: [3][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.1377(0.1792) LR: 0.00001090  
Epoch: [3][200/279] Elapsed 0m 16s (remain 0m 6s) Loss: 0.0842(0.1595) LR: 0.00000866  
Epoch: [3][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.1126(0.1496) LR: 0.00000695  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5012(0.5012) 


Epoch 3 - avg_train_loss: 0.1496  avg_val_loss: 0.6733  time: 27s
INFO:__main__:Epoch 3 - avg_train_loss: 0.1496  avg_val_loss: 0.6733  time: 27s
Epoch 3 - Score: 0.6988
INFO:__main__:Epoch 3 - Score: 0.6988


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.5213(0.6733) 
f1 score : 0.3923076923076923
recall score : 0.3333333333333333
precision score : 0.4766355140186916
thresh : 0.78
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 30s) Loss: 0.0380(0.0380) LR: 0.00000693  
Epoch: [4][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.0117(0.0267) LR: 0.00000488  
Epoch: [4][200/279] Elapsed 0m 16s (remain 0m 6s) Loss: 0.0206(0.0237) LR: 0.00000310  
Epoch: [4][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.0062(0.0221) LR: 0.00000194  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4901(0.4901) 


Epoch 4 - avg_train_loss: 0.0221  avg_val_loss: 0.8128  time: 27s
INFO:__main__:Epoch 4 - avg_train_loss: 0.0221  avg_val_loss: 0.8128  time: 27s
Epoch 4 - Score: 0.7048
INFO:__main__:Epoch 4 - Score: 0.7048


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.5991(0.8128) 
f1 score : 0.39215686274509803
recall score : 0.32679738562091504
precision score : 0.49019607843137253
thresh : 0.76
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 23s) Loss: 0.0092(0.0092) LR: 0.00000193  
Epoch: [5][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.0134(0.0125) LR: 0.00000082  
Epoch: [5][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.0115(0.0124) LR: 0.00000017  
Epoch: [5][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.0117(0.0126) LR: 0.00000000  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4917(0.4917) 


Epoch 5 - avg_train_loss: 0.0126  avg_val_loss: 0.8356  time: 27s
INFO:__main__:Epoch 5 - avg_train_loss: 0.0126  avg_val_loss: 0.8356  time: 27s
Epoch 5 - Score: 0.7028
INFO:__main__:Epoch 5 - Score: 0.7028


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.6105(0.8356) 
f1 score : 0.390625
recall score : 0.32679738562091504
precision score : 0.4854368932038835
thresh : 0.76


Score: 0.7149
INFO:__main__:Score: 0.7149
ACC BEST Score: 0.7149
INFO:__main__:ACC BEST Score: 0.7149
RobertaConfig {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_muppet_epoch10",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.28.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_siz

f1 score : 0.2755102040816327
recall score : 0.17647058823529413
precision score : 0.627906976744186
thresh : 0.5


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_muppet_epoch10 were not used when initializing RobertaModel: ['lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Enable AWP
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 42s) Loss: 0.6902(0.6902) LR: 0.00002000  
Epoch: [1][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.9820(0.6063) LR: 0.00001974  
Epoch: [1][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.5859(0.6005) LR: 0.00001900  
Epoch: [1][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.4633(0.5955) LR: 0.00001810  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5765(0.5765) 


Epoch 1 - avg_train_loss: 0.5955  avg_val_loss: 0.5734  time: 27s
INFO:__main__:Epoch 1 - avg_train_loss: 0.5955  avg_val_loss: 0.5734  time: 27s
Epoch 1 - Score: 0.7169
INFO:__main__:Epoch 1 - Score: 0.7169
Epoch 1 - Save Best Score: 0.7169 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7169 Model


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.3797(0.5734) 
f1 score : 0.10778443113772455
recall score : 0.058823529411764705
precision score : 0.6428571428571429
thresh : 0.43
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 30s) Loss: 0.3279(0.3279) LR: 0.00001808  
Epoch: [2][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.2724(0.4572) LR: 0.00001657  
Epoch: [2][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.3623(0.4538) LR: 0.00001473  
Epoch: [2][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.4582(0.4579) LR: 0.00001312  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5961(0.5961) 


Epoch 2 - avg_train_loss: 0.4579  avg_val_loss: 0.5727  time: 27s
INFO:__main__:Epoch 2 - avg_train_loss: 0.4579  avg_val_loss: 0.5727  time: 27s
Epoch 2 - Score: 0.7149
INFO:__main__:Epoch 2 - Score: 0.7149


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.3088(0.5727) 
f1 score : 0.32710280373831774
recall score : 0.22875816993464052
precision score : 0.5737704918032787
thresh : 0.51
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 31s) Loss: 0.1637(0.1637) LR: 0.00001309  
Epoch: [3][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.0901(0.1778) LR: 0.00001090  
Epoch: [3][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.1392(0.1614) LR: 0.00000866  
Epoch: [3][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.0684(0.1519) LR: 0.00000695  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6272(0.6272) 


Epoch 3 - avg_train_loss: 0.1519  avg_val_loss: 0.6661  time: 27s
INFO:__main__:Epoch 3 - avg_train_loss: 0.1519  avg_val_loss: 0.6661  time: 27s
Epoch 3 - Score: 0.7189
INFO:__main__:Epoch 3 - Score: 0.7189
Epoch 3 - Save Best Score: 0.7189 Model
INFO:__main__:Epoch 3 - Save Best Score: 0.7189 Model


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.1959(0.6661) 
f1 score : 0.29292929292929293
recall score : 0.1895424836601307
precision score : 0.6444444444444445
thresh : 0.5
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 35s) Loss: 0.0341(0.0341) LR: 0.00000693  
Epoch: [4][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.3323(0.0302) LR: 0.00000488  
Epoch: [4][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.0220(0.0256) LR: 0.00000310  
Epoch: [4][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.0161(0.0234) LR: 0.00000194  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.7868(0.7868) 


Epoch 4 - avg_train_loss: 0.0234  avg_val_loss: 0.7566  time: 27s
INFO:__main__:Epoch 4 - avg_train_loss: 0.0234  avg_val_loss: 0.7566  time: 27s
Epoch 4 - Score: 0.7209
INFO:__main__:Epoch 4 - Score: 0.7209
Epoch 4 - Save Best Score: 0.7209 Model
INFO:__main__:Epoch 4 - Save Best Score: 0.7209 Model


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.2054(0.7566) 
f1 score : 0.3868312757201646
recall score : 0.30718954248366015
precision score : 0.5222222222222223
thresh : 0.73
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 34s) Loss: 0.0125(0.0125) LR: 0.00000193  
Epoch: [5][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.0092(0.0129) LR: 0.00000082  
Epoch: [5][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.0180(0.0126) LR: 0.00000017  
Epoch: [5][278/279] Elapsed 0m 24s (remain 0m 0s) Loss: 0.0163(0.0122) LR: 0.00000000  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.8309(0.8309) 


Epoch 5 - avg_train_loss: 0.0122  avg_val_loss: 0.7862  time: 27s
INFO:__main__:Epoch 5 - avg_train_loss: 0.0122  avg_val_loss: 0.7862  time: 27s
Epoch 5 - Score: 0.7209
INFO:__main__:Epoch 5 - Score: 0.7209


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.2087(0.7862) 
f1 score : 0.37656903765690375
recall score : 0.29411764705882354
precision score : 0.5232558139534884
thresh : 0.73


Score: 0.7008
INFO:__main__:Score: 0.7008
ACC BEST Score: 0.7209
INFO:__main__:ACC BEST Score: 0.7209
RobertaConfig {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_muppet_epoch10",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.28.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_siz

f1 score : 0.3868312757201646
recall score : 0.30718954248366015
precision score : 0.5222222222222223
thresh : 0.73


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_muppet_epoch10 were not used when initializing RobertaModel: ['lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Enable AWP
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 34s) Loss: 0.6332(0.6332) LR: 0.00002000  
Epoch: [1][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.6484(0.6290) LR: 0.00001974  
Epoch: [1][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 1.0186(0.6188) LR: 0.00001900  
Epoch: [1][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.6107(0.6042) LR: 0.00001810  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6652(0.6652) 


Epoch 1 - avg_train_loss: 0.6042  avg_val_loss: 0.5837  time: 27s
INFO:__main__:Epoch 1 - avg_train_loss: 0.6042  avg_val_loss: 0.5837  time: 27s
Epoch 1 - Score: 0.7048
INFO:__main__:Epoch 1 - Score: 0.7048
Epoch 1 - Save Best Score: 0.7048 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7048 Model


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.6252(0.5837) 
f1 score : 0.06329113924050632
recall score : 0.03289473684210526
precision score : 0.8333333333333334
thresh : 0.35
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 33s) Loss: 0.5588(0.5588) LR: 0.00001808  
Epoch: [2][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.4223(0.5329) LR: 0.00001657  
Epoch: [2][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.5247(0.5152) LR: 0.00001473  
Epoch: [2][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.4983(0.5114) LR: 0.00001312  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.6353(0.6353) 


Epoch 2 - avg_train_loss: 0.5114  avg_val_loss: 0.5835  time: 27s
INFO:__main__:Epoch 2 - avg_train_loss: 0.5114  avg_val_loss: 0.5835  time: 27s
Epoch 2 - Score: 0.7149
INFO:__main__:Epoch 2 - Score: 0.7149
Epoch 2 - Save Best Score: 0.7149 Model
INFO:__main__:Epoch 2 - Save Best Score: 0.7149 Model


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.5891(0.5835) 
f1 score : 0.2340425531914894
recall score : 0.14473684210526316
precision score : 0.6111111111111112
thresh : 0.55
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 41s) Loss: 0.2480(0.2480) LR: 0.00001309  
Epoch: [3][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.1206(0.2620) LR: 0.00001090  
Epoch: [3][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.1474(0.2464) LR: 0.00000866  
Epoch: [3][278/279] Elapsed 0m 24s (remain 0m 0s) Loss: 0.1788(0.2383) LR: 0.00000695  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.7475(0.7475) 


Epoch 3 - avg_train_loss: 0.2383  avg_val_loss: 0.6739  time: 27s
INFO:__main__:Epoch 3 - avg_train_loss: 0.2383  avg_val_loss: 0.6739  time: 27s
Epoch 3 - Score: 0.7068
INFO:__main__:Epoch 3 - Score: 0.7068


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.7877(0.6739) 
f1 score : 0.3076923076923077
recall score : 0.2236842105263158
precision score : 0.4927536231884058
thresh : 0.68
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 39s) Loss: 0.0461(0.0461) LR: 0.00000693  
Epoch: [4][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.0399(0.0459) LR: 0.00000488  
Epoch: [4][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.0343(0.0397) LR: 0.00000310  
Epoch: [4][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.0468(0.0374) LR: 0.00000194  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.8464(0.8464) 


Epoch 4 - avg_train_loss: 0.0374  avg_val_loss: 0.7485  time: 27s
INFO:__main__:Epoch 4 - avg_train_loss: 0.0374  avg_val_loss: 0.7485  time: 27s
Epoch 4 - Score: 0.7068
INFO:__main__:Epoch 4 - Score: 0.7068


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.9047(0.7485) 
f1 score : 0.4189723320158103
recall score : 0.34868421052631576
precision score : 0.5247524752475248
thresh : 0.53
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 36s) Loss: 0.0226(0.0226) LR: 0.00000193  
Epoch: [5][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.0179(0.0168) LR: 0.00000082  
Epoch: [5][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.0138(0.0159) LR: 0.00000017  
Epoch: [5][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.0095(0.0159) LR: 0.00000000  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.9117(0.9117) 


Epoch 5 - avg_train_loss: 0.0159  avg_val_loss: 0.7998  time: 27s
INFO:__main__:Epoch 5 - avg_train_loss: 0.0159  avg_val_loss: 0.7998  time: 27s
Epoch 5 - Score: 0.7129
INFO:__main__:Epoch 5 - Score: 0.7129


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.9203(0.7998) 
f1 score : 0.4189723320158103
recall score : 0.34868421052631576
precision score : 0.5247524752475248
thresh : 0.65


Score: 0.7108
INFO:__main__:Score: 0.7108
ACC BEST Score: 0.7149
INFO:__main__:ACC BEST Score: 0.7149
RobertaConfig {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_muppet_epoch10",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.28.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_siz

f1 score : 0.2340425531914894
recall score : 0.14473684210526316
precision score : 0.6111111111111112
thresh : 0.55


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_muppet_epoch10 were not used when initializing RobertaModel: ['lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Enable AWP
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 41s) Loss: 0.6866(0.6866) LR: 0.00002000  
Epoch: [1][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.6144(0.6109) LR: 0.00001974  
Epoch: [1][200/279] Elapsed 0m 16s (remain 0m 6s) Loss: 0.6338(0.6077) LR: 0.00001900  
Epoch: [1][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.3766(0.6042) LR: 0.00001810  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4922(0.4922) 


Epoch 1 - avg_train_loss: 0.6042  avg_val_loss: 0.5778  time: 26s
INFO:__main__:Epoch 1 - avg_train_loss: 0.6042  avg_val_loss: 0.5778  time: 26s
Epoch 1 - Score: 0.7304
INFO:__main__:Epoch 1 - Score: 0.7304
Epoch 1 - Save Best Score: 0.7304 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7304 Model


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.4531(0.5778) 
f1 score : 0.07453416149068323
recall score : 0.039473684210526314
precision score : 0.6666666666666666
thresh : 0.33
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 32s) Loss: 0.7610(0.7610) LR: 0.00001809  
Epoch: [2][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.5038(0.4909) LR: 0.00001657  
Epoch: [2][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.4361(0.4905) LR: 0.00001473  
Epoch: [2][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.4725(0.4890) LR: 0.00001312  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5462(0.5462) 


Epoch 2 - avg_train_loss: 0.4890  avg_val_loss: 0.5617  time: 27s
INFO:__main__:Epoch 2 - avg_train_loss: 0.4890  avg_val_loss: 0.5617  time: 27s
Epoch 2 - Score: 0.7163
INFO:__main__:Epoch 2 - Score: 0.7163


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.4506(0.5617) 
f1 score : 0.4461538461538461
recall score : 0.3815789473684211
precision score : 0.5370370370370371
thresh : 0.64
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 35s) Loss: 0.2584(0.2584) LR: 0.00001310  
Epoch: [3][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.2170(0.2262) LR: 0.00001091  
Epoch: [3][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.1942(0.2057) LR: 0.00000867  
Epoch: [3][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.1486(0.1932) LR: 0.00000696  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5822(0.5822) 


Epoch 3 - avg_train_loss: 0.1932  avg_val_loss: 0.6313  time: 27s
INFO:__main__:Epoch 3 - avg_train_loss: 0.1932  avg_val_loss: 0.6313  time: 27s
Epoch 3 - Score: 0.7103
INFO:__main__:Epoch 3 - Score: 0.7103


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.3993(0.6313) 
f1 score : 0.33035714285714285
recall score : 0.24342105263157895
precision score : 0.5138888888888888
thresh : 0.41
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 35s) Loss: 0.0353(0.0353) LR: 0.00000694  
Epoch: [4][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.0177(0.0365) LR: 0.00000490  
Epoch: [4][200/279] Elapsed 0m 16s (remain 0m 6s) Loss: 0.0219(0.0313) LR: 0.00000311  
Epoch: [4][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.0156(0.0293) LR: 0.00000195  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.7130(0.7130) 


Epoch 4 - avg_train_loss: 0.0293  avg_val_loss: 0.7265  time: 27s
INFO:__main__:Epoch 4 - avg_train_loss: 0.0293  avg_val_loss: 0.7265  time: 27s
Epoch 4 - Score: 0.7103
INFO:__main__:Epoch 4 - Score: 0.7103


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.4497(0.7265) 
f1 score : 0.44852941176470584
recall score : 0.40131578947368424
precision score : 0.5083333333333333
thresh : 0.55
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 35s) Loss: 0.0103(0.0103) LR: 0.00000194  
Epoch: [5][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.0121(0.0143) LR: 0.00000082  
Epoch: [5][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.0115(0.0137) LR: 0.00000017  
Epoch: [5][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.0110(0.0136) LR: 0.00000000  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.7329(0.7329) 


Epoch 5 - avg_train_loss: 0.0136  avg_val_loss: 0.7632  time: 27s
INFO:__main__:Epoch 5 - avg_train_loss: 0.0136  avg_val_loss: 0.7632  time: 27s
Epoch 5 - Score: 0.7062
INFO:__main__:Epoch 5 - Score: 0.7062


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.4343(0.7632) 
f1 score : 0.4202334630350194
recall score : 0.35526315789473684
precision score : 0.5142857142857142
thresh : 0.44


Score: 0.7002
INFO:__main__:Score: 0.7002
ACC BEST Score: 0.7304
INFO:__main__:ACC BEST Score: 0.7304
RobertaConfig {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_muppet_epoch10",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.28.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_siz

f1 score : 0.07453416149068323
recall score : 0.039473684210526314
precision score : 0.6666666666666666
thresh : 0.33


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_muppet_epoch10 were not used when initializing RobertaModel: ['lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Enable AWP
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 33s) Loss: 0.6209(0.6209) LR: 0.00002000  
Epoch: [1][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.6748(0.6158) LR: 0.00001974  
Epoch: [1][200/279] Elapsed 0m 16s (remain 0m 6s) Loss: 0.6020(0.6060) LR: 0.00001900  
Epoch: [1][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.4961(0.6019) LR: 0.00001810  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6361(0.6361) 


Epoch 1 - avg_train_loss: 0.6019  avg_val_loss: 0.5859  time: 26s
INFO:__main__:Epoch 1 - avg_train_loss: 0.6019  avg_val_loss: 0.5859  time: 26s
Epoch 1 - Score: 0.7284
INFO:__main__:Epoch 1 - Score: 0.7284
Epoch 1 - Save Best Score: 0.7284 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7284 Model


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.7174(0.5859) 
f1 score : 0.10975609756097561
recall score : 0.05921052631578947
precision score : 0.75
thresh : 0.44
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 31s) Loss: 0.6408(0.6408) LR: 0.00001809  
Epoch: [2][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.5125(0.5031) LR: 0.00001657  
Epoch: [2][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.3587(0.5013) LR: 0.00001473  
Epoch: [2][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.3471(0.5007) LR: 0.00001312  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6744(0.6744) 


Epoch 2 - avg_train_loss: 0.5007  avg_val_loss: 0.5793  time: 27s
INFO:__main__:Epoch 2 - avg_train_loss: 0.5007  avg_val_loss: 0.5793  time: 27s
Epoch 2 - Score: 0.7243
INFO:__main__:Epoch 2 - Score: 0.7243


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.7731(0.5793) 
f1 score : 0.26315789473684215
recall score : 0.16447368421052633
precision score : 0.6578947368421053
thresh : 0.46
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 32s) Loss: 0.3971(0.3971) LR: 0.00001310  
Epoch: [3][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.1855(0.2640) LR: 0.00001091  
Epoch: [3][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.2499(0.2429) LR: 0.00000867  
Epoch: [3][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.1750(0.2345) LR: 0.00000696  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.8118(0.8118) 


Epoch 3 - avg_train_loss: 0.2345  avg_val_loss: 0.6415  time: 27s
INFO:__main__:Epoch 3 - avg_train_loss: 0.2345  avg_val_loss: 0.6415  time: 27s
Epoch 3 - Score: 0.7264
INFO:__main__:Epoch 3 - Score: 0.7264


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.6679(0.6415) 
f1 score : 0.4320557491289198
recall score : 0.40789473684210525
precision score : 0.45925925925925926
thresh : 0.66
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 39s) Loss: 0.0966(0.0966) LR: 0.00000694  
Epoch: [4][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.0336(0.0481) LR: 0.00000490  
Epoch: [4][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.0357(0.0425) LR: 0.00000311  
Epoch: [4][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.0411(0.0416) LR: 0.00000195  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.9009(0.9009) 


Epoch 4 - avg_train_loss: 0.0416  avg_val_loss: 0.7383  time: 27s
INFO:__main__:Epoch 4 - avg_train_loss: 0.0416  avg_val_loss: 0.7383  time: 27s
Epoch 4 - Score: 0.7223
INFO:__main__:Epoch 4 - Score: 0.7223


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.8424(0.7383) 
f1 score : 0.39183673469387753
recall score : 0.3157894736842105
precision score : 0.5161290322580645
thresh : 0.7
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 34s) Loss: 0.0254(0.0254) LR: 0.00000194  
Epoch: [5][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.0250(0.0194) LR: 0.00000082  
Epoch: [5][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.0169(0.0207) LR: 0.00000017  
Epoch: [5][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.0181(0.0198) LR: 0.00000000  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.9417(0.9417) 


Epoch 5 - avg_train_loss: 0.0198  avg_val_loss: 0.7796  time: 27s
INFO:__main__:Epoch 5 - avg_train_loss: 0.0198  avg_val_loss: 0.7796  time: 27s
Epoch 5 - Score: 0.7243
INFO:__main__:Epoch 5 - Score: 0.7243


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.9432(0.7796) 
f1 score : 0.39506172839506176
recall score : 0.3157894736842105
precision score : 0.5274725274725275
thresh : 0.67


Score: 0.7062
INFO:__main__:Score: 0.7062
ACC BEST Score: 0.7284
INFO:__main__:ACC BEST Score: 0.7284
RobertaConfig {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_muppet_epoch10",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.28.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_siz

f1 score : 0.10975609756097561
recall score : 0.05921052631578947
precision score : 0.75
thresh : 0.44


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_muppet_epoch10 were not used when initializing RobertaModel: ['lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Enable AWP
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 41s) Loss: 0.8248(0.8248) LR: 0.00002000  
Epoch: [1][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.5076(0.6215) LR: 0.00001974  
Epoch: [1][200/279] Elapsed 0m 16s (remain 0m 6s) Loss: 0.5058(0.6170) LR: 0.00001900  
Epoch: [1][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.5481(0.6102) LR: 0.00001810  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5694(0.5694) 


Epoch 1 - avg_train_loss: 0.6102  avg_val_loss: 0.5783  time: 26s
INFO:__main__:Epoch 1 - avg_train_loss: 0.6102  avg_val_loss: 0.5783  time: 26s
Epoch 1 - Score: 0.7223
INFO:__main__:Epoch 1 - Score: 0.7223
Epoch 1 - Save Best Score: 0.7223 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7223 Model


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.5242(0.5783) 
f1 score : 0.15116279069767444
recall score : 0.08552631578947369
precision score : 0.65
thresh : 0.44
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 31s) Loss: 0.6405(0.6405) LR: 0.00001809  
Epoch: [2][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.5315(0.5049) LR: 0.00001657  
Epoch: [2][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.4980(0.4989) LR: 0.00001473  
Epoch: [2][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.4327(0.5015) LR: 0.00001312  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6030(0.6030) 


Epoch 2 - avg_train_loss: 0.5015  avg_val_loss: 0.6056  time: 27s
INFO:__main__:Epoch 2 - avg_train_loss: 0.5015  avg_val_loss: 0.6056  time: 27s
Epoch 2 - Score: 0.7082
INFO:__main__:Epoch 2 - Score: 0.7082


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.4669(0.6056) 
f1 score : 0.10843373493975902
recall score : 0.05921052631578947
precision score : 0.6428571428571429
thresh : 0.55
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 42s) Loss: 0.2573(0.2573) LR: 0.00001310  
Epoch: [3][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.1559(0.2407) LR: 0.00001091  
Epoch: [3][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.3588(0.2208) LR: 0.00000867  
Epoch: [3][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.1571(0.2063) LR: 0.00000696  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5673(0.5673) 


Epoch 3 - avg_train_loss: 0.2063  avg_val_loss: 0.6530  time: 27s
INFO:__main__:Epoch 3 - avg_train_loss: 0.2063  avg_val_loss: 0.6530  time: 27s
Epoch 3 - Score: 0.7143
INFO:__main__:Epoch 3 - Score: 0.7143


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.5109(0.6530) 
f1 score : 0.3716814159292035
recall score : 0.27631578947368424
precision score : 0.5675675675675675
thresh : 0.5
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 33s) Loss: 0.0480(0.0480) LR: 0.00000694  
Epoch: [4][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.0444(0.0372) LR: 0.00000490  
Epoch: [4][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.0162(0.0329) LR: 0.00000311  
Epoch: [4][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.0307(0.0310) LR: 0.00000195  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6616(0.6616) 


Epoch 4 - avg_train_loss: 0.0310  avg_val_loss: 0.7594  time: 27s
INFO:__main__:Epoch 4 - avg_train_loss: 0.0310  avg_val_loss: 0.7594  time: 27s
Epoch 4 - Score: 0.7183
INFO:__main__:Epoch 4 - Score: 0.7183


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.4715(0.7594) 
f1 score : 0.3603603603603604
recall score : 0.2631578947368421
precision score : 0.5714285714285714
thresh : 0.45
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 36s) Loss: 0.0145(0.0145) LR: 0.00000194  
Epoch: [5][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.0143(0.0160) LR: 0.00000082  
Epoch: [5][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.0156(0.0155) LR: 0.00000017  
Epoch: [5][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.0142(0.0156) LR: 0.00000000  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6841(0.6841) 


Epoch 5 - avg_train_loss: 0.0156  avg_val_loss: 0.7896  time: 27s
INFO:__main__:Epoch 5 - avg_train_loss: 0.0156  avg_val_loss: 0.7896  time: 27s
Epoch 5 - Score: 0.7203
INFO:__main__:Epoch 5 - Score: 0.7203


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.4755(0.7896) 
f1 score : 0.3660714285714286
recall score : 0.26973684210526316
precision score : 0.5694444444444444
thresh : 0.45


Score: 0.7062
INFO:__main__:Score: 0.7062
ACC BEST Score: 0.7223
INFO:__main__:ACC BEST Score: 0.7223
RobertaConfig {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_muppet_epoch10",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.28.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_siz

f1 score : 0.15116279069767444
recall score : 0.08552631578947369
precision score : 0.65
thresh : 0.44


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_muppet_epoch10 were not used when initializing RobertaModel: ['lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Enable AWP
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 39s) Loss: 0.7209(0.7209) LR: 0.00002000  
Epoch: [1][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.5572(0.6185) LR: 0.00001974  
Epoch: [1][200/279] Elapsed 0m 16s (remain 0m 6s) Loss: 0.5948(0.5984) LR: 0.00001900  
Epoch: [1][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.5565(0.5970) LR: 0.00001810  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6600(0.6600) 


Epoch 1 - avg_train_loss: 0.5970  avg_val_loss: 0.5923  time: 26s
INFO:__main__:Epoch 1 - avg_train_loss: 0.5970  avg_val_loss: 0.5923  time: 26s
Epoch 1 - Score: 0.7103
INFO:__main__:Epoch 1 - Score: 0.7103
Epoch 1 - Save Best Score: 0.7103 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7103 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.7007(0.5923) 
f1 score : 0.30697674418604654
recall score : 0.21710526315789475
precision score : 0.5238095238095238
thresh : 0.55
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 32s) Loss: 0.5312(0.5312) LR: 0.00001809  
Epoch: [2][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.4716(0.4775) LR: 0.00001657  
Epoch: [2][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.4723(0.4648) LR: 0.00001473  
Epoch: [2][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.7593(0.4669) LR: 0.00001312  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.7049(0.7049) 


Epoch 2 - avg_train_loss: 0.4669  avg_val_loss: 0.5538  time: 27s
INFO:__main__:Epoch 2 - avg_train_loss: 0.4669  avg_val_loss: 0.5538  time: 27s
Epoch 2 - Score: 0.7243
INFO:__main__:Epoch 2 - Score: 0.7243
Epoch 2 - Save Best Score: 0.7243 Model
INFO:__main__:Epoch 2 - Save Best Score: 0.7243 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.7217(0.5538) 
f1 score : 0.27411167512690354
recall score : 0.17763157894736842
precision score : 0.6
thresh : 0.44
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 37s) Loss: 0.1935(0.1935) LR: 0.00001310  
Epoch: [3][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.1374(0.1857) LR: 0.00001091  
Epoch: [3][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.0788(0.1676) LR: 0.00000867  
Epoch: [3][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.4466(0.1588) LR: 0.00000696  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.8778(0.8778) 


Epoch 3 - avg_train_loss: 0.1588  avg_val_loss: 0.6109  time: 27s
INFO:__main__:Epoch 3 - avg_train_loss: 0.1588  avg_val_loss: 0.6109  time: 27s
Epoch 3 - Score: 0.7163
INFO:__main__:Epoch 3 - Score: 0.7163


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.9450(0.6109) 
f1 score : 0.434108527131783
recall score : 0.3684210526315789
precision score : 0.5283018867924528
thresh : 0.78
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 34s) Loss: 0.0439(0.0439) LR: 0.00000694  
Epoch: [4][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.0224(0.0298) LR: 0.00000490  
Epoch: [4][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.0137(0.0256) LR: 0.00000311  
Epoch: [4][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.0131(0.0239) LR: 0.00000195  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 1.1333(1.1333) 


Epoch 4 - avg_train_loss: 0.0239  avg_val_loss: 0.7191  time: 27s
INFO:__main__:Epoch 4 - avg_train_loss: 0.0239  avg_val_loss: 0.7191  time: 27s
Epoch 4 - Score: 0.7143
INFO:__main__:Epoch 4 - Score: 0.7143


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 1.2343(0.7191) 
f1 score : 0.3801652892561984
recall score : 0.3026315789473684
precision score : 0.5111111111111111
thresh : 0.57
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 36s) Loss: 0.0136(0.0136) LR: 0.00000194  
Epoch: [5][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.0094(0.0129) LR: 0.00000082  
Epoch: [5][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.0111(0.0120) LR: 0.00000017  
Epoch: [5][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.0084(0.0117) LR: 0.00000000  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 1.1958(1.1958) 


Epoch 5 - avg_train_loss: 0.0117  avg_val_loss: 0.7486  time: 27s
INFO:__main__:Epoch 5 - avg_train_loss: 0.0117  avg_val_loss: 0.7486  time: 27s
Epoch 5 - Score: 0.7163
INFO:__main__:Epoch 5 - Score: 0.7163


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 1.3140(0.7486) 
f1 score : 0.37815126050420167
recall score : 0.29605263157894735
precision score : 0.5232558139534884
thresh : 0.79


Score: 0.7123
INFO:__main__:Score: 0.7123
ACC BEST Score: 0.7243
INFO:__main__:ACC BEST Score: 0.7243
RobertaConfig {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_muppet_epoch10",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.28.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_siz

f1 score : 0.27411167512690354
recall score : 0.17763157894736842
precision score : 0.6
thresh : 0.44


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_muppet_epoch10 were not used when initializing RobertaModel: ['lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Enable AWP
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 41s) Loss: 0.6906(0.6906) LR: 0.00002000  
Epoch: [1][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.6587(0.6081) LR: 0.00001974  
Epoch: [1][200/279] Elapsed 0m 16s (remain 0m 6s) Loss: 0.6525(0.6015) LR: 0.00001900  
Epoch: [1][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.4909(0.6046) LR: 0.00001810  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.5694(0.5694) 


Epoch 1 - avg_train_loss: 0.6046  avg_val_loss: 0.5902  time: 26s
INFO:__main__:Epoch 1 - avg_train_loss: 0.6046  avg_val_loss: 0.5902  time: 26s
Epoch 1 - Score: 0.7143
INFO:__main__:Epoch 1 - Score: 0.7143
Epoch 1 - Save Best Score: 0.7143 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7143 Model


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.6703(0.5902) 
f1 score : 0.12048192771084337
recall score : 0.06578947368421052
precision score : 0.7142857142857143
thresh : 0.46
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 32s) Loss: 0.5448(0.5448) LR: 0.00001809  
Epoch: [2][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.5402(0.5130) LR: 0.00001657  
Epoch: [2][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.5194(0.5095) LR: 0.00001473  
Epoch: [2][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.5098(0.5010) LR: 0.00001312  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5504(0.5504) 


Epoch 2 - avg_train_loss: 0.5010  avg_val_loss: 0.5747  time: 27s
INFO:__main__:Epoch 2 - avg_train_loss: 0.5010  avg_val_loss: 0.5747  time: 27s
Epoch 2 - Score: 0.7042
INFO:__main__:Epoch 2 - Score: 0.7042


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.6733(0.5747) 
f1 score : 0.4689655172413793
recall score : 0.4473684210526316
precision score : 0.4927536231884058
thresh : 0.62
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 37s) Loss: 0.3586(0.3586) LR: 0.00001310  
Epoch: [3][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.1538(0.2494) LR: 0.00001091  
Epoch: [3][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.3470(0.2340) LR: 0.00000867  
Epoch: [3][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.0909(0.2222) LR: 0.00000696  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4867(0.4867) 


Epoch 3 - avg_train_loss: 0.2222  avg_val_loss: 0.6732  time: 27s
INFO:__main__:Epoch 3 - avg_train_loss: 0.2222  avg_val_loss: 0.6732  time: 27s
Epoch 3 - Score: 0.7163
INFO:__main__:Epoch 3 - Score: 0.7163
Epoch 3 - Save Best Score: 0.7163 Model
INFO:__main__:Epoch 3 - Save Best Score: 0.7163 Model


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.6657(0.6732) 
f1 score : 0.36440677966101687
recall score : 0.28289473684210525
precision score : 0.5119047619047619
thresh : 0.59
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 44s) Loss: 0.0642(0.0642) LR: 0.00000694  
Epoch: [4][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.0365(0.0402) LR: 0.00000490  
Epoch: [4][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.0381(0.0375) LR: 0.00000311  
Epoch: [4][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.0187(0.0342) LR: 0.00000195  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.5663(0.5663) 


Epoch 4 - avg_train_loss: 0.0342  avg_val_loss: 0.7943  time: 27s
INFO:__main__:Epoch 4 - avg_train_loss: 0.0342  avg_val_loss: 0.7943  time: 27s
Epoch 4 - Score: 0.7082
INFO:__main__:Epoch 4 - Score: 0.7082


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.8306(0.7943) 
f1 score : 0.4153846153846154
recall score : 0.35526315789473684
precision score : 0.5
thresh : 0.72
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 37s) Loss: 0.0144(0.0144) LR: 0.00000194  
Epoch: [5][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.0152(0.0150) LR: 0.00000082  
Epoch: [5][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.0133(0.0148) LR: 0.00000017  
Epoch: [5][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.0172(0.0146) LR: 0.00000000  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.6176(0.6176) 


Epoch 5 - avg_train_loss: 0.0146  avg_val_loss: 0.8473  time: 27s
INFO:__main__:Epoch 5 - avg_train_loss: 0.0146  avg_val_loss: 0.8473  time: 27s
Epoch 5 - Score: 0.7082
INFO:__main__:Epoch 5 - Score: 0.7082


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.9337(0.8473) 
f1 score : 0.4
recall score : 0.32894736842105265
precision score : 0.5102040816326531
thresh : 0.73


Score: 0.6982
INFO:__main__:Score: 0.6982
ACC BEST Score: 0.7163
INFO:__main__:ACC BEST Score: 0.7163
RobertaConfig {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_muppet_epoch10",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.28.1",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_siz

f1 score : 0.36440677966101687
recall score : 0.28289473684210525
precision score : 0.5119047619047619
thresh : 0.59


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_muppet_epoch10 were not used when initializing RobertaModel: ['lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Enable AWP
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 43s) Loss: 0.6939(0.6939) LR: 0.00002000  
Epoch: [1][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.5799(0.6174) LR: 0.00001974  
Epoch: [1][200/279] Elapsed 0m 16s (remain 0m 6s) Loss: 0.8062(0.6124) LR: 0.00001900  
Epoch: [1][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.4426(0.6048) LR: 0.00001810  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6735(0.6735) 


Epoch 1 - avg_train_loss: 0.6048  avg_val_loss: 0.6093  time: 26s
INFO:__main__:Epoch 1 - avg_train_loss: 0.6048  avg_val_loss: 0.6093  time: 26s
Epoch 1 - Score: 0.7143
INFO:__main__:Epoch 1 - Score: 0.7143
Epoch 1 - Save Best Score: 0.7143 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7143 Model


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.5427(0.6093) 
f1 score : 0.13095238095238093
recall score : 0.07236842105263158
precision score : 0.6875
thresh : 0.49
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 35s) Loss: 0.5718(0.5718) LR: 0.00001809  
Epoch: [2][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.4143(0.5034) LR: 0.00001657  
Epoch: [2][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.3247(0.4967) LR: 0.00001473  
Epoch: [2][278/279] Elapsed 0m 24s (remain 0m 0s) Loss: 0.5698(0.4982) LR: 0.00001312  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6403(0.6403) 


Epoch 2 - avg_train_loss: 0.4982  avg_val_loss: 0.5917  time: 27s
INFO:__main__:Epoch 2 - avg_train_loss: 0.4982  avg_val_loss: 0.5917  time: 27s
Epoch 2 - Score: 0.7123
INFO:__main__:Epoch 2 - Score: 0.7123


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.5772(0.5917) 
f1 score : 0.2959641255605381
recall score : 0.21710526315789475
precision score : 0.4647887323943662
thresh : 0.63
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 30s) Loss: 0.3441(0.3441) LR: 0.00001310  
Epoch: [3][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.2550(0.2457) LR: 0.00001091  
Epoch: [3][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.1979(0.2272) LR: 0.00000867  
Epoch: [3][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.1009(0.2158) LR: 0.00000696  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.7547(0.7547) 


Epoch 3 - avg_train_loss: 0.2158  avg_val_loss: 0.7054  time: 27s
INFO:__main__:Epoch 3 - avg_train_loss: 0.2158  avg_val_loss: 0.7054  time: 27s
Epoch 3 - Score: 0.6982
INFO:__main__:Epoch 3 - Score: 0.6982


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.7038(0.7054) 
f1 score : 0.3686274509803922
recall score : 0.3092105263157895
precision score : 0.4563106796116505
thresh : 0.78
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 34s) Loss: 0.0551(0.0551) LR: 0.00000694  
Epoch: [4][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.0263(0.0426) LR: 0.00000490  
Epoch: [4][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.0160(0.0365) LR: 0.00000311  
Epoch: [4][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.0173(0.0343) LR: 0.00000195  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.8203(0.8203) 


Epoch 4 - avg_train_loss: 0.0343  avg_val_loss: 0.8132  time: 27s
INFO:__main__:Epoch 4 - avg_train_loss: 0.0343  avg_val_loss: 0.8132  time: 27s
Epoch 4 - Score: 0.6982
INFO:__main__:Epoch 4 - Score: 0.6982


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.8409(0.8132) 
f1 score : 0.3319148936170213
recall score : 0.2565789473684211
precision score : 0.46987951807228917
thresh : 0.72
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 36s) Loss: 0.0135(0.0135) LR: 0.00000194  
Epoch: [5][100/279] Elapsed 0m 8s (remain 0m 15s) Loss: 0.0150(0.0156) LR: 0.00000082  
Epoch: [5][200/279] Elapsed 0m 17s (remain 0m 6s) Loss: 0.0130(0.0158) LR: 0.00000017  
Epoch: [5][278/279] Elapsed 0m 23s (remain 0m 0s) Loss: 0.0114(0.0154) LR: 0.00000000  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.8491(0.8491) 


Epoch 5 - avg_train_loss: 0.0154  avg_val_loss: 0.8560  time: 27s
INFO:__main__:Epoch 5 - avg_train_loss: 0.0154  avg_val_loss: 0.8560  time: 27s
Epoch 5 - Score: 0.6962
INFO:__main__:Epoch 5 - Score: 0.6962


EVAL: [15/16] Elapsed 0m 2s (remain 0m 0s) Loss: 0.8850(0.8560) 
f1 score : 0.3389830508474576
recall score : 0.2631578947368421
precision score : 0.47619047619047616
thresh : 0.75


Score: 0.7062
INFO:__main__:Score: 0.7062
ACC BEST Score: 0.7143
INFO:__main__:ACC BEST Score: 0.7143
Score: 0.7057
INFO:__main__:Score: 0.7057
ACC BEST Score: 0.7071
INFO:__main__:ACC BEST Score: 0.7071


f1 score : 0.13095238095238093
recall score : 0.07236842105263158
precision score : 0.6875
thresh : 0.49
f1 score : 0.2545824847250509
recall score : 0.16414970453053185
precision score : 0.5668934240362812
thresh : 0.72


In [22]:
from google.colab import runtime
runtime.unassign()