In [1]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [2]:
!pip install transformers
!pip install datasets
!pip install sentencepiece

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.28.1-py3-none-any.whl (7.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.0/7.0 MB[0m [31m52.1 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m105.1 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.11.0
  Downloading huggingface_hub-0.14.1-py3-none-any.whl (224 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.5/224.5 kB[0m [31m29.5 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.14.1 tokenizers-0.13.3 transformers-4.28.1
Looking in indexes: https://pypi.org/simple, https:/

In [3]:
!nvidia-smi

Sat May  6 10:47:14 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA A100-SXM...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   40C    P0    46W / 400W |      0MiB / 40960MiB |      0%      Default |
|                               |                      |             Disabled |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [4]:

import os
import gc
import math
import time
import random
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.simplefilter('ignore')
from tqdm import tqdm
import re
import html

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import Adam, SGD, AdamW, RAdam
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader, Dataset

from sklearn.model_selection import StratifiedKFold,StratifiedGroupKFold,GroupKFold
from sklearn.metrics import log_loss,f1_score, recall_score, accuracy_score, precision_score

from transformers import AutoModel, AutoConfig, AutoTokenizer, AdamW, DataCollatorWithPadding
from transformers import get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [5]:
DIR = "/content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測"
INPUT_DIR = os.path.join(DIR,"input")
OUTPUT_DIR = os.path.join(DIR,"output")
CUSTOM_MODEL_DIR = os.path.join(OUTPUT_DIR,'clrp_albert_base_v2_epoch10')
OUTPUT_EXP_DIR = DIR + '/output/EXP071/'
if not os.path.exists(OUTPUT_EXP_DIR):
    os.makedirs(OUTPUT_EXP_DIR)

In [6]:

# ====================================================
# CFG
# ====================================================
class CFG:
    debug=False
    apex=True
    print_freq=100
    num_workers=4
    model_name="albert-base-v2"
    # model='microsoft/deberta-base'
    # model='roberta-base'
    # model='roberta-large'
    # model='roberta-large-mnli'
    # model='xlnet-large-cased'
    # model='albert-xxlarge-v2'
    # model="microsoft/deberta-large"
    # model="microsoft/deberta-v3-large"
    # model='microsoft/deberta-v2-xlarge'
    # model='funnel-transformer/large'
    # model='funnel-transformer/medium'
    # model='albert-base-v2'
    # model='albert-large-v2'
    # model='google/electra-large-discriminator'
    # model='google/electra-base-discriminator'
    # model="facebook/bart-large-mnli"
    # model="facebook/bart-large"
    # model="facebook/bart-base"
    model = CUSTOM_MODEL_DIR
    scheduler='cosine' # ['linear', 'cosine']
    batch_scheduler=True
    num_cycles=0.5
    num_warmup_steps=0
    epochs=5
    encoder_lr=2e-5
    decoder_lr=2e-5
    min_lr=1e-6
    eps=1e-6
    betas=(0.9, 0.999)
    batch_size=16
    fc_dropout=0.2
    target_size=1
    max_len=512
    weight_decay=0.01
    gradient_accumulation_steps=1
    max_grad_norm=1000
    seed=42
    n_fold=10
    trn_fold=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
    train=True
    nth_awp_start_epoch=1
    gradient_checkpointing = False
    freezing = False
    num_reinit_layers = 1
    is_reinit_layer = False
    fgm = False
    awp_start=1

if CFG.debug:
    CFG.epochs = 2
    CFG.trn_fold = [0, 1]

In [7]:
def get_score(labels, outputs):
    thresh = 0.5
    y_pred = outputs
    y_true = labels
    f_score = f1_score(y_true, (y_pred>thresh).astype(int))
    r_score = recall_score(y_true, (y_pred>thresh).astype(int))
    p_score = precision_score(y_true, (y_pred>thresh).astype(int))
    print(f"f1 score : {f_score}")
    print(f"recall score : {r_score}")
    print(f"precision score : {p_score}")
    return accuracy_score(y_true, (y_pred>thresh).astype(int))

def get_acc_score(labels, outputs):
    y_pred = outputs
    y_true = labels
    best_score = 0
    best_thresh = 0.5
    for thresh in np.arange(0.1, 0.80, 0.01):
        thresh = np.round(thresh, 2)
        score = accuracy_score(y_true, (y_pred>thresh).astype(int))
        #print("Accuracy score at threshold {0} is {1}".format(thresh, score))
        if score > best_score:
          best_score = score
          best_thresh = thresh
    print(f"thresh : {best_thresh}")
    return accuracy_score(y_true, (y_pred>best_thresh).astype(int))


def get_logger(filename=OUTPUT_EXP_DIR+'train'):
    from logging import getLogger, INFO, FileHandler, Formatter, StreamHandler
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=f"{filename}.log")
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

LOGGER = get_logger()

def seed_everything(seed=CFG.seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_everything(seed=CFG.seed)

In [8]:
def freeze(module):
    """
    Freezes module's parameters.
    """
    
    for parameter in module.parameters():
        parameter.requires_grad = False
        
def get_freezed_parameters(module):
    """
    Returns names of freezed parameters of the given module.
    """
    
    freezed_parameters = []
    for name, parameter in module.named_parameters():
        if not parameter.requires_grad:
            freezed_parameters.append(name)
            
    return freezed_parameters

def set_embedding_parameters_bits(embeddings_path, optim_bits=32):
    """
    https://github.com/huggingface/transformers/issues/14819#issuecomment-1003427930
    """
    
    embedding_types = ("word", "position", "token_type")
    for embedding_type in embedding_types:
        attr_name = f"{embedding_type}_embeddings"
        
        if hasattr(embeddings_path, attr_name): 
            bnb.optim.GlobalOptimManager.get_instance().register_module_override(
                getattr(embeddings_path, attr_name), 'weight', {'optim_bits': optim_bits}
            )

In [9]:
import pandas as pd
import numpy as np

train = pd.read_csv(os.path.join(INPUT_DIR,"train_data.csv"))
test = pd.read_csv(os.path.join(INPUT_DIR,"test_data.csv"))
sample_sub = pd.read_csv(os.path.join(INPUT_DIR,"submission.csv"))

train = train.sample(frac=1, random_state=CFG.seed).reset_index()

print(train.shape)
display(train.head(3))

print(test.shape)
display(test.head(3))

print(sample_sub.shape)
display(sample_sub.head(3))

(4974, 7)


Unnamed: 0,index,id,title,year,abstract,keywords,y
0,721,722,Global Optimality Conditions for Deep Neural N...,2018,We study the error landscape of deep linear an...,"deep linear neural networks, global optimality...",1
1,144,145,Multi-Task Learning by Deep Collaboration and ...,2018,Convolutional neural networks (CNN) have becom...,"multi-task learning, soft parameter sharing, f...",0
2,4542,4543,On the Need for Topology-Aware Generative Mode...,2020,"ML algorithms or models, especially deep neura...","Manifold-based Defense, Robust Learning, Adver...",1


(6393, 5)


Unnamed: 0,id,title,year,abstract,keywords
0,1,StyleAlign: Analysis and Applications of Align...,2022,"In this paper, we perform an in-depth study of...","StyleGAN, transfer learning, fine tuning, mode..."
1,2,Embedding a random graph via GNN: mean-field i...,2021,We develop a theory for embedding a random gra...,"Graph neural network, graph embedding, multi-r..."
2,3,BBRefinement: an universal scheme to improve p...,2021,We present a conceptually simple yet powerful ...,"object detection, deep neural networks, refine..."


(6393, 2)


Unnamed: 0,id,y
0,1,0
1,2,0
2,3,0


In [10]:
train["texts"] = train["title"] + "[SEP]" + train["abstract"] 

In [11]:
skf = StratifiedKFold(n_splits=CFG.n_fold,shuffle=True,random_state=CFG.seed)
for fold, ( _, val_) in enumerate(skf.split(train, train.y)):
    train.loc[val_ , "kfold"] = int(fold)
    
train["kfold"] = train["kfold"].astype(int)

if CFG.debug:
    display(train.groupby('kfold').size())
    train = train.sample(n=500, random_state=0).reset_index(drop=True)
    display(train.groupby('kfold').size())

In [12]:
# ====================================================
# tokenizer
# ====================================================
tokenizer = AutoTokenizer.from_pretrained(CFG.model)
tokenizer.save_pretrained(OUTPUT_EXP_DIR+'tokenizer/')
CFG.tokenizer = tokenizer

In [13]:
# ====================================================
# Define max_len
# ====================================================
#lengths = []
#tk0 = tqdm(train['texts'].fillna("").values, total=len(train))
#for text in tk0:
#    length = len(tokenizer(text, add_special_tokens=False)['input_ids'])
#    lengths.append(length)
#CFG.max_len = max(lengths) + 3 # cls + sep + sep
#LOGGER.info(f"max_len: {CFG.max_len}")

In [14]:
class AWP:
    def __init__(self, model, optimizer, *, adv_param='weight',
                 adv_lr=0.001, adv_eps=0.001):
        self.model = model
        self.optimizer = optimizer
        self.adv_param = adv_param
        self.adv_lr = adv_lr
        self.adv_eps = adv_eps
        self.backup = {}

    def perturb(self, inputs, y, criterion):
        """
        Perturb model parameters for AWP gradient
        Call before loss and loss.backward()
        """
        self._save()  # save model parameters
        self._attack_step()  # perturb weights

    def _attack_step(self):
        e = 1e-6
        for name, param in self.model.named_parameters():
            if param.requires_grad and param.grad is not None and self.adv_param in name:
                grad = self.optimizer.state[param]['exp_avg']
                norm_grad = torch.norm(grad)
                norm_data = torch.norm(param.detach())

                if norm_grad != 0 and not torch.isnan(norm_grad):
                    # Set lower and upper limit in change
                    limit_eps = self.adv_eps * param.detach().abs()
                    param_min = param.data - limit_eps
                    param_max = param.data + limit_eps

                    # Perturb along gradient
                    # w += (adv_lr * |w| / |grad|) * grad
                    param.data.add_(grad, alpha=(self.adv_lr * (norm_data + e) / (norm_grad + e)))

                    # Apply the limit to the change
                    param.data.clamp_(param_min, param_max)

    def _save(self):
        for name, param in self.model.named_parameters():
            if param.requires_grad and param.grad is not None and self.adv_param in name:
                if name not in self.backup:
                    self.backup[name] = param.clone().detach()
                else:
                    self.backup[name].copy_(param.data)

    def restore(self):
        """
        Restore model parameter to correct position; AWP do not perturbe weights, it perturb gradients
        Call after loss.backward(), before optimizer.step()
        """
        for name, param in self.model.named_parameters():
            if name in self.backup:
                param.data.copy_(self.backup[name])

In [15]:
# ====================================================
# Dataset
# ====================================================
def prepare_input(cfg, text):
    inputs = cfg.tokenizer(text,
                           add_special_tokens=True,
                           max_length=cfg.max_len,
                           padding="max_length",
                           return_offsets_mapping=False,
                           truncation=True)
    for k, v in inputs.items():
        inputs[k] = torch.tensor(v, dtype=torch.long)
    return inputs


class TrainDataset(Dataset):
    def __init__(self, cfg, df):
        self.cfg = cfg
        self.inputs = df['texts'].values
        self.labels = df['y'].values

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, item):
        inputs = prepare_input(self.cfg, self.inputs[item])
        label = torch.tensor(self.labels[item], dtype=torch.float)
        return inputs, label

def collate(inputs):
    mask_len = int(inputs["attention_mask"].sum(axis=1).max())
    for k, v in inputs.items():
        inputs[k] = inputs[k][:,:mask_len]
    return inputs

def collate(inputs):
    mask_len = int(inputs["attention_mask"].sum(axis=1).max())
    for k, v in inputs.items():
        inputs[k] = inputs[k][:,:mask_len]
    return inputs

#collate_fn = DataCollatorWithPadding(tokenizer=tokenizer)

In [16]:
def reinit_layers(model):

    #for layer in model.model.encoder.layer[-CFG.num_reinit_layers:]:
    for layer in model.encoder.layer[-CFG.num_reinit_layers:]:    #Custome model内(backbone)

            for module in layer.modules():

                if isinstance(module,nn.Linear):
                    module.weight.data.normal_(mean=0.0,std=model.config.initializer_range)
                    if module.bias is not None:
                            module.bias.data.zero_()
                elif isinstance(module, nn.Embedding):
                        module.weight.data.normal_(mean=0.0, std=model.config.initializer_range)
                        if module.padding_idx is not None:
                            module.weight.data[module.padding_idx].zero_()
                elif isinstance(module, nn.LayerNorm):
                        module.bias.data.zero_()
                        module.weight.data.fill_(1.0)
                        
    return model

In [17]:
# ====================================================
# Model
# ====================================================
class MeanPooling(nn.Module):
    def __init__(self):
        super(MeanPooling, self).__init__()
        
    def forward(self, last_hidden_state, attention_mask):
        input_mask_expanded = attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float()
        sum_embeddings = torch.sum(last_hidden_state * input_mask_expanded, 1)
        sum_mask = input_mask_expanded.sum(1)
        sum_mask = torch.clamp(sum_mask, min=1e-9)
        mean_embeddings = sum_embeddings / sum_mask
        return mean_embeddings

class MaxPooling(nn.Module):
    def __init__(self):
        super(MaxPooling, self).__init__()
        
    def forward(self, last_hidden_state, attention_mask):
        input_mask_expanded = attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float()
        embeddings = last_hidden_state.clone()
        embeddings[input_mask_expanded == 0] = -1e4
        max_embeddings, _ = torch.max(embeddings, dim=1)
        return max_embeddings
    

class CustomModel(nn.Module):
    def __init__(self, cfg, config_path=None, pretrained=False):
        super().__init__()
        self.cfg = cfg
        if config_path is None:
            self.config = AutoConfig.from_pretrained(cfg.model, output_hidden_states=True)
            self.config.hidden_dropout = 0.
            self.config.hidden_dropout_prob = 0.
            self.config.attention_dropout = 0.
            self.config.attention_probs_dropout_prob = 0.
            LOGGER.info(self.config)
        else:
            self.config = torch.load(config_path)
        if pretrained:
            self.model = AutoModel.from_pretrained(cfg.model, config=self.config)
        else:
            self.model = AutoModel(self.config)
        if CFG.is_reinit_layer:
            self.model = reinit_layers(self.model)
            print(f'Reinitializing Last {CFG.num_reinit_layers} Layers.')
        if self.cfg.gradient_checkpointing:
            self.model.gradient_checkpointing_enable()

        # Freezing
        if cfg.freezing:
            # freezing embeddings and first 2 layers of encoder
            freeze((self.model).embeddings)
            freeze((self.model).encoder.layer[:2])
            cfg.after_freezed_parameters = filter(lambda parameter: parameter.requires_grad, (self.model).parameters())

        self.pool = MaxPooling()
        self.fc = nn.Linear(self.config.hidden_size, cfg.target_size)
        self._init_weights(self.fc)
        self.layer_norm1 = nn.LayerNorm(self.config.hidden_size)
        
    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.bias is not None:
                module.bias.data.zero_()
        elif isinstance(module, nn.Embedding):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.padding_idx is not None:
                module.weight.data[module.padding_idx].zero_()
        elif isinstance(module, nn.LayerNorm):
            module.bias.data.zero_()
            module.weight.data.fill_(1.0)
        
    def feature(self, inputs):
        outputs = self.model(**inputs)
        last_hidden_states = outputs[0]
        feature = self.pool(last_hidden_states, inputs['attention_mask'])
        return feature

    def forward(self, inputs):
        feature = self.feature(inputs)
        feature = self.layer_norm1(feature)
        output = self.fc(feature)
        return output

In [18]:
def calculate_loss(inputs, labels, model, criterion, is_valid=True, device="cpu"):    
    y_preds = model(inputs)
    loss = criterion(y_preds.view(-1, 1), labels.view(-1, 1))
    return (loss, y_preds) if is_valid else loss

In [19]:

# ====================================================
# Helper functions
# ====================================================
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (remain %s)' % (asMinutes(s), asMinutes(rs))


def train_fn(fold, train_loader, model, criterion, optimizer, epoch, scheduler, device, awp):
    model.zero_grad()
    model.train()
    awp_start = CFG.awp_start
    scaler = torch.cuda.amp.GradScaler(enabled=CFG.apex)
    losses = AverageMeter()
    start = end = time.time()
    global_step = 0
    for step, (inputs, labels) in enumerate(train_loader):
        inputs = collate(inputs)
        for k, v in inputs.items():
            inputs[k] = v.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        if epoch >= awp_start:
            awp.perturb(inputs, labels, criterion)
        with torch.cuda.amp.autocast(enabled=CFG.apex):
            loss = calculate_loss(inputs=inputs, labels=labels, model=model, criterion=criterion, is_valid=False, device=device)
        #print(y_preds.sigmoid().squeeze().view(1, -1))
        #loss = criterion(y_preds.sigmoid().squeeze(), labels.squeeze())
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        losses.update(loss.item(), batch_size)
        scaler.scale(loss).backward()
        if scaler is not None:
            scaler.unscale_(optimizer)
        torch.nn.utils.clip_grad_norm_(model.parameters(), CFG.max_grad_norm)
        awp.restore()
        if CFG.fgm:
          fgm.attack() 
          adversarial_loss = calculate_loss(inputs=inputs, labels=labels, model=model, criterion=criterion, is_valid=False, device=device)
          scaler.scale(adversarial_loss).backward()
          fgm.restore()
        if (step + 1) % CFG.gradient_accumulation_steps == 0:
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()
            global_step += 1
            if CFG.batch_scheduler:
                scheduler.step()
        end = time.time()
        if step % CFG.print_freq == 0 or step == (len(train_loader)-1):
            print('Epoch: [{0}][{1}/{2}] '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  #'Grad: {grad_norm:.4f}  '
                  'LR: {lr:.8f}  '
                  .format(epoch+1, step, len(train_loader), 
                          remain=timeSince(start, float(step+1)/len(train_loader)),
                          loss=losses,
                          #grad_norm=grad_norm,
                          lr=scheduler.get_lr()[0]))

    return losses.avg


def valid_fn(valid_loader, model, criterion, device):
    losses = AverageMeter()
    model.eval()
    preds = []
    start = end = time.time()
    for step, (inputs, labels) in enumerate(valid_loader):
        inputs = collate(inputs)
        for k, v in inputs.items():
            inputs[k] = v.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        with torch.no_grad():
            loss, y_preds = calculate_loss(inputs=inputs, labels=labels, model=model, criterion=criterion, is_valid=True, device=device)
        #loss = criterion(y_preds.sigmoid().squeeze(), labels.squeeze())
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        losses.update(loss.item(), batch_size)
        preds.append(y_preds.sigmoid().to('cpu').numpy())
        end = time.time()
        if step % CFG.print_freq == 0 or step == (len(valid_loader)-1):
            print('EVAL: [{0}/{1}] '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  .format(step, len(valid_loader),
                          loss=losses,
                          remain=timeSince(start, float(step+1)/len(valid_loader))))
    predictions = np.concatenate(preds)
    predictions = np.concatenate(predictions)
    return losses.avg, predictions


def inference_fn(test_loader, model, device):
    preds = []
    model.eval()
    model.to(device)
    tk0 = tqdm(test_loader, total=len(test_loader))
    for inputs in tk0:
        inputs = collate(inputs)
        for k, v in inputs.items():
            inputs[k] = v.to(device)
        with torch.no_grad():
            y_preds = model(inputs)
        preds.append(y_preds.sigmoid().to('cpu').numpy())
    predictions = np.concatenate(preds)
    return predictions

In [20]:

# ====================================================
# train loop
# ====================================================
def train_loop(folds, fold):
    
    LOGGER.info(f"========== fold: {fold} training ==========")

    # ====================================================
    # loader
    # ====================================================
    train_folds = folds[folds['kfold'] != fold].reset_index(drop=True)
    valid_folds = folds[folds['kfold'] == fold].reset_index(drop=True)
    valid_labels = valid_folds['y'].values
    
    train_dataset = TrainDataset(CFG, train_folds)
    valid_dataset = TrainDataset(CFG, valid_folds)


    train_loader = DataLoader(train_dataset,
                              batch_size=CFG.batch_size,
                              shuffle=True,
                              num_workers=CFG.num_workers, pin_memory=True, drop_last=True)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=CFG.batch_size*2,
                              shuffle=False,
                              num_workers=CFG.num_workers, pin_memory=True, drop_last=False)

    # ====================================================
    # model & optimizer
    # ====================================================
    model = CustomModel(CFG, config_path=None, pretrained=True)
    torch.save(model.config, OUTPUT_EXP_DIR+'config.pth')
    model.to(device)
    
    def get_optimizer_params(model, encoder_lr=5e-6, decoder_lr=1e-4, weight_decay=0.0):
        param_optimizer = list(model.named_parameters())
        no_decay = ["bias", "LayerNorm.bias", 
                    "LayerNorm.weight"]
        group1=['layer.0.','layer.1.','layer.2.','layer.3.']
        group2=['layer.4.','layer.5.','layer.6.','layer.7.']    
        group3=['layer.8.','layer.9.','layer.10.','layer.11.']
        group_all=['layer.0.','layer.1.','layer.2.','layer.3.','layer.4.','layer.5.','layer.6.','layer.7.','layer.8.','layer.9.','layer.10.','layer.11.']
        optimizer_parameters = [
        {'params': [p for n, p in model.model.named_parameters() if not any(nd in n for nd in no_decay) and not any(nd in n for nd in group_all)],'weight_decay': weight_decay},
        {'params': [p for n, p in model.model.named_parameters() if not any(nd in n for nd in no_decay) and any(nd in n for nd in group1)],'weight_decay': weight_decay, 'lr': encoder_lr/2.6},
        {'params': [p for n, p in model.model.named_parameters() if not any(nd in n for nd in no_decay) and any(nd in n for nd in group2)],'weight_decay': weight_decay, 'lr': encoder_lr},
        {'params': [p for n, p in model.model.named_parameters() if not any(nd in n for nd in no_decay) and any(nd in n for nd in group3)],'weight_decay': weight_decay, 'lr': encoder_lr*2.6},
        {'params': [p for n, p in model.model.named_parameters() if any(nd in n for nd in no_decay) and not any(nd in n for nd in group_all)],'weight_decay': 0.0},
        {'params': [p for n, p in model.model.named_parameters() if any(nd in n for nd in no_decay) and any(nd in n for nd in group1)],'weight_decay': 0.0, 'lr': encoder_lr/2.6},
        {'params': [p for n, p in model.model.named_parameters() if any(nd in n for nd in no_decay) and any(nd in n for nd in group2)],'weight_decay': 0.0, 'lr': encoder_lr},
        {'params': [p for n, p in model.model.named_parameters() if any(nd in n for nd in no_decay) and any(nd in n for nd in group3)],'weight_decay': 0.0, 'lr': encoder_lr*2.6},
        {'params': [p for n, p in model.named_parameters() if "model" not in n], 'lr':decoder_lr, "momentum" : 0.99},
    ]
        return optimizer_parameters

    optimizer_parameters = get_optimizer_params(model,
                                                encoder_lr=CFG.encoder_lr, 
                                                decoder_lr=CFG.decoder_lr,
                                                weight_decay=CFG.weight_decay)
    optimizer = AdamW(optimizer_parameters, lr=CFG.encoder_lr, eps=CFG.eps, betas=CFG.betas)
    
    # ====================================================
    # scheduler
    # ====================================================
    def get_scheduler(cfg, optimizer, num_train_steps):
        if cfg.scheduler == 'linear':
            scheduler = get_linear_schedule_with_warmup(
                optimizer, num_warmup_steps=cfg.num_warmup_steps, num_training_steps=num_train_steps
            )
        elif cfg.scheduler == 'cosine':
            scheduler = get_cosine_schedule_with_warmup(
                optimizer, num_warmup_steps=cfg.num_warmup_steps, num_training_steps=num_train_steps, num_cycles=cfg.num_cycles
            )
        return scheduler
    
    num_train_steps = int(len(train_folds) / CFG.batch_size * CFG.epochs)
    scheduler = get_scheduler(CFG, optimizer, num_train_steps)

    # ====================================================
    # loop
    # ====================================================
    criterion = nn.BCEWithLogitsLoss()

    print('Enable AWP')
    awp = AWP(model, optimizer, adv_lr=0.001, adv_eps=0.001)
    #print('Enable FGM')
    #fgm = FGM(model=model, eps=0.1)
    
    best_score = -1.

    for epoch in range(CFG.epochs):

        start_time = time.time()

        # train
        avg_loss = train_fn(fold, train_loader, model, criterion, optimizer, epoch, scheduler, device, awp)

        # eval
        avg_val_loss, predictions = valid_fn(valid_loader, model, criterion, device)
        
        # scoring
        score_05 = get_score(valid_labels, predictions)
        score = get_acc_score(valid_labels, predictions)

        elapsed = time.time() - start_time

        LOGGER.info(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.0f}s')
        LOGGER.info(f'Epoch {epoch+1} - Score: {score:.4f}')

        
        if best_score < score:
            best_score = score
            LOGGER.info(f'Epoch {epoch+1} - Save Best Score: {best_score:.4f} Model')
            torch.save({'model': model.state_dict(),
                        'predictions': predictions},
                        OUTPUT_EXP_DIR+f"{CFG.model_name.replace('/', '-')}_fold{fold}_best.pth")

    predictions = torch.load(OUTPUT_EXP_DIR+f"{CFG.model_name.replace('/', '-')}_fold{fold}_best.pth", 
                             map_location=torch.device('cpu'))['predictions']
    valid_folds['pred'] = predictions

    torch.cuda.empty_cache()
    gc.collect()
    
    return valid_folds

In [21]:
if __name__ == '__main__':
    
    def get_result(oof_df):
        labels = oof_df['y'].values
        preds = oof_df['pred'].values
        score = get_score(labels, preds)
        acc_score = get_acc_score(labels, preds)
        LOGGER.info(f'Score: {score:<.4f}')
        LOGGER.info(f'ACC BEST Score: {acc_score:<.4f}')
    
    if CFG.train:
        oof_df = pd.DataFrame()
        for fold in range(CFG.n_fold):
            if fold in CFG.trn_fold:
                _oof_df = train_loop(train, fold)
                oof_df = pd.concat([oof_df, _oof_df])
                LOGGER.info(f"========== fold: {fold} result ==========")
                get_result(_oof_df)
            #break
        oof_df = oof_df.reset_index(drop=True)
        LOGGER.info(f"========== CV ==========")
        get_result(oof_df)
        oof_df.to_pickle(OUTPUT_EXP_DIR+'oof_df.pkl')

AlbertConfig {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_albert_base_v2_epoch10",
  "architectures": [
    "AlbertForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "bos_token_id": 2,
  "classifier_dropout_prob": 0.1,
  "down_scale_factor": 1,
  "embedding_size": 128,
  "eos_token_id": 3,
  "gap_size": 0,
  "hidden_act": "gelu_new",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "inner_group_num": 1,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "albert",
  "net_structure_type": 0,
  "num_attention_heads": 12,
  "num_hidden_groups": 1,
  "num_hidden_layers": 12,
  "num_memory_blocks": 0,
  "output_hidden_states": true,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transfor

Enable AWP
Epoch: [1][0/279] Elapsed 0m 3s (remain 17m 48s) Loss: 1.0353(1.0353) LR: 0.00002000  
Epoch: [1][100/279] Elapsed 0m 13s (remain 0m 23s) Loss: 0.6139(0.6438) LR: 0.00001974  
Epoch: [1][200/279] Elapsed 0m 22s (remain 0m 8s) Loss: 0.6176(0.6236) LR: 0.00001900  
Epoch: [1][278/279] Elapsed 0m 30s (remain 0m 0s) Loss: 0.4637(0.6192) LR: 0.00001810  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.5706(0.5706) 


Epoch 1 - avg_train_loss: 0.6192  avg_val_loss: 0.5954  time: 34s
INFO:__main__:Epoch 1 - avg_train_loss: 0.6192  avg_val_loss: 0.5954  time: 34s
Epoch 1 - Score: 0.7088
INFO:__main__:Epoch 1 - Score: 0.7088
Epoch 1 - Save Best Score: 0.7088 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7088 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.6343(0.5954) 
f1 score : 0.15999999999999998
recall score : 0.0915032679738562
precision score : 0.6363636363636364
thresh : 0.51
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 20s) Loss: 0.5793(0.5793) LR: 0.00001808  
Epoch: [2][100/279] Elapsed 0m 9s (remain 0m 17s) Loss: 0.3716(0.5149) LR: 0.00001657  
Epoch: [2][200/279] Elapsed 0m 19s (remain 0m 7s) Loss: 0.8298(0.5167) LR: 0.00001473  
Epoch: [2][278/279] Elapsed 0m 26s (remain 0m 0s) Loss: 0.4764(0.5226) LR: 0.00001312  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5537(0.5537) 


Epoch 2 - avg_train_loss: 0.5226  avg_val_loss: 0.5947  time: 30s
INFO:__main__:Epoch 2 - avg_train_loss: 0.5226  avg_val_loss: 0.5947  time: 30s
Epoch 2 - Score: 0.7169
INFO:__main__:Epoch 2 - Score: 0.7169
Epoch 2 - Save Best Score: 0.7169 Model
INFO:__main__:Epoch 2 - Save Best Score: 0.7169 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.6662(0.5947) 
f1 score : 0.3054187192118227
recall score : 0.20261437908496732
precision score : 0.62
thresh : 0.5
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 19s) Loss: 0.5692(0.5692) LR: 0.00001309  
Epoch: [3][100/279] Elapsed 0m 9s (remain 0m 17s) Loss: 0.4386(0.4047) LR: 0.00001090  
Epoch: [3][200/279] Elapsed 0m 19s (remain 0m 7s) Loss: 0.2705(0.4014) LR: 0.00000866  
Epoch: [3][278/279] Elapsed 0m 26s (remain 0m 0s) Loss: 0.4052(0.3992) LR: 0.00000695  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5118(0.5118) 


Epoch 3 - avg_train_loss: 0.3992  avg_val_loss: 0.5743  time: 30s
INFO:__main__:Epoch 3 - avg_train_loss: 0.3992  avg_val_loss: 0.5743  time: 30s
Epoch 3 - Score: 0.7209
INFO:__main__:Epoch 3 - Score: 0.7209
Epoch 3 - Save Best Score: 0.7209 Model
INFO:__main__:Epoch 3 - Save Best Score: 0.7209 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.6712(0.5743) 
f1 score : 0.2842639593908629
recall score : 0.1830065359477124
precision score : 0.6363636363636364
thresh : 0.47
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 21s) Loss: 0.3877(0.3877) LR: 0.00000693  
Epoch: [4][100/279] Elapsed 0m 9s (remain 0m 17s) Loss: 0.3334(0.2801) LR: 0.00000488  
Epoch: [4][200/279] Elapsed 0m 19s (remain 0m 7s) Loss: 0.2546(0.2743) LR: 0.00000310  
Epoch: [4][278/279] Elapsed 0m 26s (remain 0m 0s) Loss: 0.2526(0.2737) LR: 0.00000194  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4950(0.4950) 


Epoch 4 - avg_train_loss: 0.2737  avg_val_loss: 0.5780  time: 30s
INFO:__main__:Epoch 4 - avg_train_loss: 0.2737  avg_val_loss: 0.5780  time: 30s
Epoch 4 - Score: 0.7189
INFO:__main__:Epoch 4 - Score: 0.7189


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.6827(0.5780) 
f1 score : 0.34579439252336447
recall score : 0.24183006535947713
precision score : 0.6065573770491803
thresh : 0.5
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 20s) Loss: 0.2701(0.2701) LR: 0.00000193  
Epoch: [5][100/279] Elapsed 0m 9s (remain 0m 17s) Loss: 0.2169(0.2202) LR: 0.00000082  
Epoch: [5][200/279] Elapsed 0m 19s (remain 0m 7s) Loss: 0.2213(0.2133) LR: 0.00000017  
Epoch: [5][278/279] Elapsed 0m 26s (remain 0m 0s) Loss: 0.2633(0.2135) LR: 0.00000000  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4965(0.4965) 


Epoch 5 - avg_train_loss: 0.2135  avg_val_loss: 0.5826  time: 30s
INFO:__main__:Epoch 5 - avg_train_loss: 0.2135  avg_val_loss: 0.5826  time: 30s
Epoch 5 - Score: 0.7229
INFO:__main__:Epoch 5 - Score: 0.7229
Epoch 5 - Save Best Score: 0.7229 Model
INFO:__main__:Epoch 5 - Save Best Score: 0.7229 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.7055(0.5826) 
f1 score : 0.297029702970297
recall score : 0.19607843137254902
precision score : 0.6122448979591837
thresh : 0.46


Score: 0.7149
INFO:__main__:Score: 0.7149
ACC BEST Score: 0.7229
INFO:__main__:ACC BEST Score: 0.7229
AlbertConfig {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_albert_base_v2_epoch10",
  "architectures": [
    "AlbertForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "bos_token_id": 2,
  "classifier_dropout_prob": 0.1,
  "down_scale_factor": 1,
  "embedding_size": 128,
  "eos_token_id": 3,
  "gap_size": 0,
  "hidden_act": "gelu_new",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "inner_group_num": 1,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "albert",
  "net_structure_type": 0,
  "num_attention_heads": 12,
  "num_hidden_groups": 1,
  "num_hidden_layers": 12,
  "num_memory_blocks": 0,
  "output_hidden_states": true,

f1 score : 0.297029702970297
recall score : 0.19607843137254902
precision score : 0.6122448979591837
thresh : 0.46
Enable AWP
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 31s) Loss: 0.7632(0.7632) LR: 0.00002000  
Epoch: [1][100/279] Elapsed 0m 9s (remain 0m 17s) Loss: 0.6585(0.6357) LR: 0.00001974  
Epoch: [1][200/279] Elapsed 0m 19s (remain 0m 7s) Loss: 0.7034(0.6253) LR: 0.00001900  
Epoch: [1][278/279] Elapsed 0m 26s (remain 0m 0s) Loss: 0.4578(0.6197) LR: 0.00001810  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 5s) Loss: 0.4666(0.4666) 


Epoch 1 - avg_train_loss: 0.6197  avg_val_loss: 0.5859  time: 30s
INFO:__main__:Epoch 1 - avg_train_loss: 0.6197  avg_val_loss: 0.5859  time: 30s
Epoch 1 - Score: 0.7149
INFO:__main__:Epoch 1 - Score: 0.7149
Epoch 1 - Save Best Score: 0.7149 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7149 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.5170(0.5859) 
f1 score : 0.15476190476190477
recall score : 0.08496732026143791
precision score : 0.8666666666666667
thresh : 0.5
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 22s) Loss: 0.6010(0.6010) LR: 0.00001808  
Epoch: [2][100/279] Elapsed 0m 9s (remain 0m 17s) Loss: 0.6045(0.5197) LR: 0.00001657  
Epoch: [2][200/279] Elapsed 0m 19s (remain 0m 7s) Loss: 0.4739(0.5156) LR: 0.00001473  
Epoch: [2][278/279] Elapsed 0m 26s (remain 0m 0s) Loss: 0.4060(0.5158) LR: 0.00001312  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 5s) Loss: 0.4734(0.4734) 


Epoch 2 - avg_train_loss: 0.5158  avg_val_loss: 0.5924  time: 30s
INFO:__main__:Epoch 2 - avg_train_loss: 0.5158  avg_val_loss: 0.5924  time: 30s
Epoch 2 - Score: 0.7108
INFO:__main__:Epoch 2 - Score: 0.7108


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.5479(0.5924) 
f1 score : 0.18579234972677594
recall score : 0.1111111111111111
precision score : 0.5666666666666667
thresh : 0.58
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 21s) Loss: 0.3961(0.3961) LR: 0.00001309  
Epoch: [3][100/279] Elapsed 0m 9s (remain 0m 17s) Loss: 0.4785(0.3849) LR: 0.00001090  
Epoch: [3][200/279] Elapsed 0m 19s (remain 0m 7s) Loss: 0.3491(0.3819) LR: 0.00000866  
Epoch: [3][278/279] Elapsed 0m 26s (remain 0m 0s) Loss: 0.3685(0.3794) LR: 0.00000695  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 5s) Loss: 0.4754(0.4754) 


Epoch 3 - avg_train_loss: 0.3794  avg_val_loss: 0.6021  time: 30s
INFO:__main__:Epoch 3 - avg_train_loss: 0.3794  avg_val_loss: 0.6021  time: 30s
Epoch 3 - Score: 0.7108
INFO:__main__:Epoch 3 - Score: 0.7108


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.5745(0.6021) 
f1 score : 0.3111111111111111
recall score : 0.22875816993464052
precision score : 0.4861111111111111
thresh : 0.61
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 20s) Loss: 0.2835(0.2835) LR: 0.00000693  
Epoch: [4][100/279] Elapsed 0m 9s (remain 0m 17s) Loss: 0.2486(0.2470) LR: 0.00000488  
Epoch: [4][200/279] Elapsed 0m 19s (remain 0m 7s) Loss: 0.3377(0.2456) LR: 0.00000310  
Epoch: [4][278/279] Elapsed 0m 26s (remain 0m 0s) Loss: 0.3260(0.2439) LR: 0.00000194  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 5s) Loss: 0.4212(0.4212) 


Epoch 4 - avg_train_loss: 0.2439  avg_val_loss: 0.6092  time: 30s
INFO:__main__:Epoch 4 - avg_train_loss: 0.2439  avg_val_loss: 0.6092  time: 30s
Epoch 4 - Score: 0.7088
INFO:__main__:Epoch 4 - Score: 0.7088


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.5661(0.6092) 
f1 score : 0.25365853658536586
recall score : 0.16993464052287582
precision score : 0.5
thresh : 0.74
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 26s) Loss: 0.1367(0.1367) LR: 0.00000193  
Epoch: [5][100/279] Elapsed 0m 9s (remain 0m 17s) Loss: 0.1940(0.1845) LR: 0.00000082  
Epoch: [5][200/279] Elapsed 0m 19s (remain 0m 7s) Loss: 0.2708(0.1863) LR: 0.00000017  
Epoch: [5][278/279] Elapsed 0m 26s (remain 0m 0s) Loss: 0.2114(0.1853) LR: 0.00000000  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 5s) Loss: 0.4126(0.4126) 


Epoch 5 - avg_train_loss: 0.1853  avg_val_loss: 0.6134  time: 30s
INFO:__main__:Epoch 5 - avg_train_loss: 0.1853  avg_val_loss: 0.6134  time: 30s
Epoch 5 - Score: 0.7088
INFO:__main__:Epoch 5 - Score: 0.7088


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.5644(0.6134) 
f1 score : 0.26
recall score : 0.16993464052287582
precision score : 0.5531914893617021
thresh : 0.74


Score: 0.7149
INFO:__main__:Score: 0.7149
ACC BEST Score: 0.7149
INFO:__main__:ACC BEST Score: 0.7149
AlbertConfig {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_albert_base_v2_epoch10",
  "architectures": [
    "AlbertForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "bos_token_id": 2,
  "classifier_dropout_prob": 0.1,
  "down_scale_factor": 1,
  "embedding_size": 128,
  "eos_token_id": 3,
  "gap_size": 0,
  "hidden_act": "gelu_new",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "inner_group_num": 1,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "albert",
  "net_structure_type": 0,
  "num_attention_heads": 12,
  "num_hidden_groups": 1,
  "num_hidden_layers": 12,
  "num_memory_blocks": 0,
  "output_hidden_states": true,

f1 score : 0.15476190476190477
recall score : 0.08496732026143791
precision score : 0.8666666666666667
thresh : 0.5
Enable AWP
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 31s) Loss: 0.5912(0.5912) LR: 0.00002000  
Epoch: [1][100/279] Elapsed 0m 9s (remain 0m 17s) Loss: 0.7902(0.6133) LR: 0.00001974  
Epoch: [1][200/279] Elapsed 0m 19s (remain 0m 7s) Loss: 0.4361(0.6142) LR: 0.00001900  
Epoch: [1][278/279] Elapsed 0m 26s (remain 0m 0s) Loss: 0.7162(0.6105) LR: 0.00001810  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5589(0.5589) 


Epoch 1 - avg_train_loss: 0.6105  avg_val_loss: 0.6000  time: 30s
INFO:__main__:Epoch 1 - avg_train_loss: 0.6105  avg_val_loss: 0.6000  time: 30s
Epoch 1 - Score: 0.7068
INFO:__main__:Epoch 1 - Score: 0.7068
Epoch 1 - Save Best Score: 0.7068 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7068 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.5040(0.6000) 
f1 score : 0.16666666666666669
recall score : 0.09803921568627451
precision score : 0.5555555555555556
thresh : 0.52
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 25s) Loss: 0.5383(0.5383) LR: 0.00001808  
Epoch: [2][100/279] Elapsed 0m 10s (remain 0m 17s) Loss: 0.5011(0.5212) LR: 0.00001657  
Epoch: [2][200/279] Elapsed 0m 19s (remain 0m 7s) Loss: 0.4386(0.5304) LR: 0.00001473  
Epoch: [2][278/279] Elapsed 0m 27s (remain 0m 0s) Loss: 0.4643(0.5272) LR: 0.00001312  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5474(0.5474) 


Epoch 2 - avg_train_loss: 0.5272  avg_val_loss: 0.5957  time: 30s
INFO:__main__:Epoch 2 - avg_train_loss: 0.5272  avg_val_loss: 0.5957  time: 30s
Epoch 2 - Score: 0.7068
INFO:__main__:Epoch 2 - Score: 0.7068


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.4257(0.5957) 
f1 score : 0.20430107526881722
recall score : 0.12418300653594772
precision score : 0.5757575757575758
thresh : 0.56
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 26s) Loss: 0.4698(0.4698) LR: 0.00001309  
Epoch: [3][100/279] Elapsed 0m 9s (remain 0m 17s) Loss: 0.4818(0.4047) LR: 0.00001090  
Epoch: [3][200/279] Elapsed 0m 19s (remain 0m 7s) Loss: 0.4148(0.3984) LR: 0.00000866  
Epoch: [3][278/279] Elapsed 0m 26s (remain 0m 0s) Loss: 0.3552(0.3933) LR: 0.00000695  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5602(0.5602) 


Epoch 3 - avg_train_loss: 0.3933  avg_val_loss: 0.5912  time: 30s
INFO:__main__:Epoch 3 - avg_train_loss: 0.3933  avg_val_loss: 0.5912  time: 30s
Epoch 3 - Score: 0.7108
INFO:__main__:Epoch 3 - Score: 0.7108
Epoch 3 - Save Best Score: 0.7108 Model
INFO:__main__:Epoch 3 - Save Best Score: 0.7108 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.3983(0.5912) 
f1 score : 0.3013698630136986
recall score : 0.21568627450980393
precision score : 0.5
thresh : 0.68
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 26s) Loss: 0.3016(0.3016) LR: 0.00000693  
Epoch: [4][100/279] Elapsed 0m 9s (remain 0m 17s) Loss: 0.2198(0.2671) LR: 0.00000488  
Epoch: [4][200/279] Elapsed 0m 19s (remain 0m 7s) Loss: 0.2925(0.2664) LR: 0.00000310  
Epoch: [4][278/279] Elapsed 0m 26s (remain 0m 0s) Loss: 0.1902(0.2596) LR: 0.00000194  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5551(0.5551) 


Epoch 4 - avg_train_loss: 0.2596  avg_val_loss: 0.5986  time: 30s
INFO:__main__:Epoch 4 - avg_train_loss: 0.2596  avg_val_loss: 0.5986  time: 30s
Epoch 4 - Score: 0.7088
INFO:__main__:Epoch 4 - Score: 0.7088


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.3764(0.5986) 
f1 score : 0.30275229357798167
recall score : 0.21568627450980393
precision score : 0.5076923076923077
thresh : 0.78
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 21s) Loss: 0.2253(0.2253) LR: 0.00000193  
Epoch: [5][100/279] Elapsed 0m 9s (remain 0m 17s) Loss: 0.2236(0.2005) LR: 0.00000082  
Epoch: [5][200/279] Elapsed 0m 19s (remain 0m 7s) Loss: 0.2032(0.1984) LR: 0.00000017  
Epoch: [5][278/279] Elapsed 0m 27s (remain 0m 0s) Loss: 0.2421(0.1983) LR: 0.00000000  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5450(0.5450) 


Epoch 5 - avg_train_loss: 0.1983  avg_val_loss: 0.5995  time: 30s
INFO:__main__:Epoch 5 - avg_train_loss: 0.1983  avg_val_loss: 0.5995  time: 30s
Epoch 5 - Score: 0.7088
INFO:__main__:Epoch 5 - Score: 0.7088


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.3635(0.5995) 
f1 score : 0.2647058823529412
recall score : 0.17647058823529413
precision score : 0.5294117647058824
thresh : 0.7


Score: 0.6928
INFO:__main__:Score: 0.6928
ACC BEST Score: 0.7108
INFO:__main__:ACC BEST Score: 0.7108
AlbertConfig {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_albert_base_v2_epoch10",
  "architectures": [
    "AlbertForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "bos_token_id": 2,
  "classifier_dropout_prob": 0.1,
  "down_scale_factor": 1,
  "embedding_size": 128,
  "eos_token_id": 3,
  "gap_size": 0,
  "hidden_act": "gelu_new",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "inner_group_num": 1,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "albert",
  "net_structure_type": 0,
  "num_attention_heads": 12,
  "num_hidden_groups": 1,
  "num_hidden_layers": 12,
  "num_memory_blocks": 0,
  "output_hidden_states": true,

f1 score : 0.3013698630136986
recall score : 0.21568627450980393
precision score : 0.5
thresh : 0.68
Enable AWP
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 34s) Loss: 0.9395(0.9395) LR: 0.00002000  
Epoch: [1][100/279] Elapsed 0m 9s (remain 0m 17s) Loss: 0.7651(0.6204) LR: 0.00001974  
Epoch: [1][200/279] Elapsed 0m 19s (remain 0m 7s) Loss: 0.5811(0.6132) LR: 0.00001900  
Epoch: [1][278/279] Elapsed 0m 26s (remain 0m 0s) Loss: 0.6870(0.6090) LR: 0.00001810  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.7221(0.7221) 


Epoch 1 - avg_train_loss: 0.6090  avg_val_loss: 0.6165  time: 30s
INFO:__main__:Epoch 1 - avg_train_loss: 0.6090  avg_val_loss: 0.6165  time: 30s
Epoch 1 - Score: 0.7289
INFO:__main__:Epoch 1 - Score: 0.7289
Epoch 1 - Save Best Score: 0.7289 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7289 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.5405(0.6165) 
f1 score : 0.025806451612903226
recall score : 0.013157894736842105
precision score : 0.6666666666666666
thresh : 0.26
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 26s) Loss: 0.3445(0.3445) LR: 0.00001808  
Epoch: [2][100/279] Elapsed 0m 9s (remain 0m 17s) Loss: 0.4822(0.4989) LR: 0.00001657  
Epoch: [2][200/279] Elapsed 0m 19s (remain 0m 7s) Loss: 0.4994(0.5049) LR: 0.00001473  
Epoch: [2][278/279] Elapsed 0m 26s (remain 0m 0s) Loss: 0.4931(0.5098) LR: 0.00001312  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6677(0.6677) 


Epoch 2 - avg_train_loss: 0.5098  avg_val_loss: 0.5783  time: 30s
INFO:__main__:Epoch 2 - avg_train_loss: 0.5098  avg_val_loss: 0.5783  time: 30s
Epoch 2 - Score: 0.7229
INFO:__main__:Epoch 2 - Score: 0.7229


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.5693(0.5783) 
f1 score : 0.2727272727272727
recall score : 0.17763157894736842
precision score : 0.5869565217391305
thresh : 0.52
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 18s) Loss: 0.3984(0.3984) LR: 0.00001309  
Epoch: [3][100/279] Elapsed 0m 10s (remain 0m 17s) Loss: 0.2768(0.3619) LR: 0.00001090  
Epoch: [3][200/279] Elapsed 0m 19s (remain 0m 7s) Loss: 0.3618(0.3670) LR: 0.00000866  
Epoch: [3][278/279] Elapsed 0m 26s (remain 0m 0s) Loss: 0.3523(0.3677) LR: 0.00000695  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6975(0.6975) 


Epoch 3 - avg_train_loss: 0.3677  avg_val_loss: 0.5837  time: 30s
INFO:__main__:Epoch 3 - avg_train_loss: 0.3677  avg_val_loss: 0.5837  time: 30s
Epoch 3 - Score: 0.7149
INFO:__main__:Epoch 3 - Score: 0.7149


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.5491(0.5837) 
f1 score : 0.18784530386740333
recall score : 0.1118421052631579
precision score : 0.5862068965517241
thresh : 0.54
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 29s) Loss: 0.1879(0.1879) LR: 0.00000693  
Epoch: [4][100/279] Elapsed 0m 9s (remain 0m 17s) Loss: 0.2234(0.2312) LR: 0.00000488  
Epoch: [4][200/279] Elapsed 0m 19s (remain 0m 7s) Loss: 0.1890(0.2277) LR: 0.00000310  
Epoch: [4][278/279] Elapsed 0m 26s (remain 0m 0s) Loss: 0.2450(0.2270) LR: 0.00000194  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.7094(0.7094) 


Epoch 4 - avg_train_loss: 0.2270  avg_val_loss: 0.5852  time: 30s
INFO:__main__:Epoch 4 - avg_train_loss: 0.2270  avg_val_loss: 0.5852  time: 30s
Epoch 4 - Score: 0.7149
INFO:__main__:Epoch 4 - Score: 0.7149


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.5545(0.5852) 
f1 score : 0.26865671641791045
recall score : 0.17763157894736842
precision score : 0.5510204081632653
thresh : 0.55
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 28s) Loss: 0.2237(0.2237) LR: 0.00000193  
Epoch: [5][100/279] Elapsed 0m 9s (remain 0m 17s) Loss: 0.1316(0.1737) LR: 0.00000082  
Epoch: [5][200/279] Elapsed 0m 19s (remain 0m 7s) Loss: 0.1672(0.1711) LR: 0.00000017  
Epoch: [5][278/279] Elapsed 0m 26s (remain 0m 0s) Loss: 0.1742(0.1690) LR: 0.00000000  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.7046(0.7046) 


Epoch 5 - avg_train_loss: 0.1690  avg_val_loss: 0.5859  time: 30s
INFO:__main__:Epoch 5 - avg_train_loss: 0.1690  avg_val_loss: 0.5859  time: 30s
Epoch 5 - Score: 0.7108
INFO:__main__:Epoch 5 - Score: 0.7108


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.5552(0.5859) 
f1 score : 0.27586206896551724
recall score : 0.18421052631578946
precision score : 0.5490196078431373
thresh : 0.47


Score: 0.6968
INFO:__main__:Score: 0.6968
ACC BEST Score: 0.7289
INFO:__main__:ACC BEST Score: 0.7289
AlbertConfig {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_albert_base_v2_epoch10",
  "architectures": [
    "AlbertForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "bos_token_id": 2,
  "classifier_dropout_prob": 0.1,
  "down_scale_factor": 1,
  "embedding_size": 128,
  "eos_token_id": 3,
  "gap_size": 0,
  "hidden_act": "gelu_new",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "inner_group_num": 1,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "albert",
  "net_structure_type": 0,
  "num_attention_heads": 12,
  "num_hidden_groups": 1,
  "num_hidden_layers": 12,
  "num_memory_blocks": 0,
  "output_hidden_states": true,

f1 score : 0.025806451612903226
recall score : 0.013157894736842105
precision score : 0.6666666666666666
thresh : 0.26
Enable AWP
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 29s) Loss: 0.7007(0.7007) LR: 0.00002000  
Epoch: [1][100/279] Elapsed 0m 9s (remain 0m 17s) Loss: 0.7994(0.6150) LR: 0.00001974  
Epoch: [1][200/279] Elapsed 0m 19s (remain 0m 7s) Loss: 0.5632(0.6092) LR: 0.00001900  
Epoch: [1][278/279] Elapsed 0m 26s (remain 0m 0s) Loss: 0.6593(0.6073) LR: 0.00001810  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4951(0.4951) 


Epoch 1 - avg_train_loss: 0.6073  avg_val_loss: 0.5839  time: 30s
INFO:__main__:Epoch 1 - avg_train_loss: 0.6073  avg_val_loss: 0.5839  time: 30s
Epoch 1 - Score: 0.7103
INFO:__main__:Epoch 1 - Score: 0.7103
Epoch 1 - Save Best Score: 0.7103 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7103 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.5095(0.5839) 
f1 score : 0.08588957055214723
recall score : 0.046052631578947366
precision score : 0.6363636363636364
thresh : 0.39
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 24s) Loss: 0.5576(0.5576) LR: 0.00001809  
Epoch: [2][100/279] Elapsed 0m 9s (remain 0m 17s) Loss: 0.4882(0.5322) LR: 0.00001657  
Epoch: [2][200/279] Elapsed 0m 19s (remain 0m 7s) Loss: 0.4334(0.5262) LR: 0.00001473  
Epoch: [2][278/279] Elapsed 0m 26s (remain 0m 0s) Loss: 0.4248(0.5249) LR: 0.00001312  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5225(0.5225) 


Epoch 2 - avg_train_loss: 0.5249  avg_val_loss: 0.5704  time: 30s
INFO:__main__:Epoch 2 - avg_train_loss: 0.5249  avg_val_loss: 0.5704  time: 30s
Epoch 2 - Score: 0.7264
INFO:__main__:Epoch 2 - Score: 0.7264
Epoch 2 - Save Best Score: 0.7264 Model
INFO:__main__:Epoch 2 - Save Best Score: 0.7264 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.4771(0.5704) 
f1 score : 0.2222222222222222
recall score : 0.13815789473684212
precision score : 0.5675675675675675
thresh : 0.46
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 21s) Loss: 0.4083(0.4083) LR: 0.00001310  
Epoch: [3][100/279] Elapsed 0m 10s (remain 0m 17s) Loss: 0.4129(0.3975) LR: 0.00001091  
Epoch: [3][200/279] Elapsed 0m 19s (remain 0m 7s) Loss: 0.5458(0.3899) LR: 0.00000867  
Epoch: [3][278/279] Elapsed 0m 26s (remain 0m 0s) Loss: 0.2556(0.3842) LR: 0.00000696  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5046(0.5046) 


Epoch 3 - avg_train_loss: 0.3842  avg_val_loss: 0.5655  time: 30s
INFO:__main__:Epoch 3 - avg_train_loss: 0.3842  avg_val_loss: 0.5655  time: 30s
Epoch 3 - Score: 0.7143
INFO:__main__:Epoch 3 - Score: 0.7143


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.4406(0.5655) 
f1 score : 0.2222222222222222
recall score : 0.13815789473684212
precision score : 0.5675675675675675
thresh : 0.46
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 26s) Loss: 0.2250(0.2250) LR: 0.00000694  
Epoch: [4][100/279] Elapsed 0m 9s (remain 0m 17s) Loss: 0.2559(0.2521) LR: 0.00000490  
Epoch: [4][200/279] Elapsed 0m 19s (remain 0m 7s) Loss: 0.2839(0.2463) LR: 0.00000311  
Epoch: [4][278/279] Elapsed 0m 26s (remain 0m 0s) Loss: 0.2051(0.2450) LR: 0.00000195  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5402(0.5402) 


Epoch 4 - avg_train_loss: 0.2450  avg_val_loss: 0.5654  time: 30s
INFO:__main__:Epoch 4 - avg_train_loss: 0.2450  avg_val_loss: 0.5654  time: 30s
Epoch 4 - Score: 0.7103
INFO:__main__:Epoch 4 - Score: 0.7103


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.4483(0.5654) 
f1 score : 0.3605150214592275
recall score : 0.27631578947368424
precision score : 0.5185185185185185
thresh : 0.56
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 25s) Loss: 0.1855(0.1855) LR: 0.00000194  
Epoch: [5][100/279] Elapsed 0m 9s (remain 0m 17s) Loss: 0.1583(0.1819) LR: 0.00000082  
Epoch: [5][200/279] Elapsed 0m 19s (remain 0m 7s) Loss: 0.2041(0.1844) LR: 0.00000017  
Epoch: [5][278/279] Elapsed 0m 26s (remain 0m 0s) Loss: 0.1896(0.1840) LR: 0.00000000  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5251(0.5251) 


Epoch 5 - avg_train_loss: 0.1840  avg_val_loss: 0.5673  time: 30s
INFO:__main__:Epoch 5 - avg_train_loss: 0.1840  avg_val_loss: 0.5673  time: 30s
Epoch 5 - Score: 0.7082
INFO:__main__:Epoch 5 - Score: 0.7082


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.4240(0.5673) 
f1 score : 0.2705314009661835
recall score : 0.18421052631578946
precision score : 0.509090909090909
thresh : 0.73


Score: 0.7042
INFO:__main__:Score: 0.7042
ACC BEST Score: 0.7264
INFO:__main__:ACC BEST Score: 0.7264
AlbertConfig {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_albert_base_v2_epoch10",
  "architectures": [
    "AlbertForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "bos_token_id": 2,
  "classifier_dropout_prob": 0.1,
  "down_scale_factor": 1,
  "embedding_size": 128,
  "eos_token_id": 3,
  "gap_size": 0,
  "hidden_act": "gelu_new",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "inner_group_num": 1,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "albert",
  "net_structure_type": 0,
  "num_attention_heads": 12,
  "num_hidden_groups": 1,
  "num_hidden_layers": 12,
  "num_memory_blocks": 0,
  "output_hidden_states": true,

f1 score : 0.2222222222222222
recall score : 0.13815789473684212
precision score : 0.5675675675675675
thresh : 0.46
Enable AWP
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 33s) Loss: 0.5592(0.5592) LR: 0.00002000  
Epoch: [1][100/279] Elapsed 0m 9s (remain 0m 17s) Loss: 0.6854(0.6141) LR: 0.00001974  
Epoch: [1][200/279] Elapsed 0m 19s (remain 0m 7s) Loss: 0.5967(0.6092) LR: 0.00001900  
Epoch: [1][278/279] Elapsed 0m 26s (remain 0m 0s) Loss: 0.4660(0.6060) LR: 0.00001810  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6100(0.6100) 


Epoch 1 - avg_train_loss: 0.6060  avg_val_loss: 0.5982  time: 30s
INFO:__main__:Epoch 1 - avg_train_loss: 0.6060  avg_val_loss: 0.5982  time: 30s
Epoch 1 - Score: 0.7062
INFO:__main__:Epoch 1 - Score: 0.7062
Epoch 1 - Save Best Score: 0.7062 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7062 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.6851(0.5982) 
f1 score : 0.14942528735632185
recall score : 0.08552631578947369
precision score : 0.5909090909090909
thresh : 0.69
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 26s) Loss: 0.6196(0.6196) LR: 0.00001809  
Epoch: [2][100/279] Elapsed 0m 9s (remain 0m 17s) Loss: 0.5584(0.5185) LR: 0.00001657  
Epoch: [2][200/279] Elapsed 0m 19s (remain 0m 7s) Loss: 0.6519(0.5207) LR: 0.00001473  
Epoch: [2][278/279] Elapsed 0m 27s (remain 0m 0s) Loss: 0.5309(0.5226) LR: 0.00001312  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 5s) Loss: 0.5994(0.5994) 


Epoch 2 - avg_train_loss: 0.5226  avg_val_loss: 0.6046  time: 30s
INFO:__main__:Epoch 2 - avg_train_loss: 0.5226  avg_val_loss: 0.6046  time: 30s
Epoch 2 - Score: 0.7062
INFO:__main__:Epoch 2 - Score: 0.7062


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.6516(0.6046) 
f1 score : 0.3968871595330739
recall score : 0.3355263157894737
precision score : 0.4857142857142857
thresh : 0.78
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 23s) Loss: 0.4173(0.4173) LR: 0.00001310  
Epoch: [3][100/279] Elapsed 0m 9s (remain 0m 17s) Loss: 0.3303(0.4042) LR: 0.00001091  
Epoch: [3][200/279] Elapsed 0m 19s (remain 0m 7s) Loss: 0.4511(0.3977) LR: 0.00000867  
Epoch: [3][278/279] Elapsed 0m 26s (remain 0m 0s) Loss: 0.3260(0.3979) LR: 0.00000696  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6113(0.6113) 


Epoch 3 - avg_train_loss: 0.3979  avg_val_loss: 0.5892  time: 30s
INFO:__main__:Epoch 3 - avg_train_loss: 0.3979  avg_val_loss: 0.5892  time: 30s
Epoch 3 - Score: 0.7062
INFO:__main__:Epoch 3 - Score: 0.7062


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.7305(0.5892) 
f1 score : 0.26262626262626265
recall score : 0.17105263157894737
precision score : 0.5652173913043478
thresh : 0.5
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 19s) Loss: 0.2820(0.2820) LR: 0.00000694  
Epoch: [4][100/279] Elapsed 0m 9s (remain 0m 16s) Loss: 0.2872(0.2800) LR: 0.00000490  
Epoch: [4][200/279] Elapsed 0m 19s (remain 0m 7s) Loss: 0.2122(0.2722) LR: 0.00000311  
Epoch: [4][278/279] Elapsed 0m 26s (remain 0m 0s) Loss: 0.4035(0.2667) LR: 0.00000195  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6360(0.6360) 


Epoch 4 - avg_train_loss: 0.2667  avg_val_loss: 0.5941  time: 30s
INFO:__main__:Epoch 4 - avg_train_loss: 0.2667  avg_val_loss: 0.5941  time: 30s
Epoch 4 - Score: 0.7062
INFO:__main__:Epoch 4 - Score: 0.7062


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.7496(0.5941) 
f1 score : 0.29493087557603687
recall score : 0.21052631578947367
precision score : 0.49230769230769234
thresh : 0.75
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 18s) Loss: 0.2052(0.2052) LR: 0.00000194  
Epoch: [5][100/279] Elapsed 0m 9s (remain 0m 17s) Loss: 0.2202(0.2063) LR: 0.00000082  
Epoch: [5][200/279] Elapsed 0m 19s (remain 0m 7s) Loss: 0.2571(0.2061) LR: 0.00000017  
Epoch: [5][278/279] Elapsed 0m 26s (remain 0m 0s) Loss: 0.2358(0.2068) LR: 0.00000000  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6441(0.6441) 


Epoch 5 - avg_train_loss: 0.2068  avg_val_loss: 0.5956  time: 30s
INFO:__main__:Epoch 5 - avg_train_loss: 0.2068  avg_val_loss: 0.5956  time: 30s
Epoch 5 - Score: 0.7062
INFO:__main__:Epoch 5 - Score: 0.7062


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.7717(0.5956) 
f1 score : 0.2830188679245283
recall score : 0.19736842105263158
precision score : 0.5
thresh : 0.75


Score: 0.7022
INFO:__main__:Score: 0.7022
ACC BEST Score: 0.7062
INFO:__main__:ACC BEST Score: 0.7062
AlbertConfig {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_albert_base_v2_epoch10",
  "architectures": [
    "AlbertForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "bos_token_id": 2,
  "classifier_dropout_prob": 0.1,
  "down_scale_factor": 1,
  "embedding_size": 128,
  "eos_token_id": 3,
  "gap_size": 0,
  "hidden_act": "gelu_new",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "inner_group_num": 1,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "albert",
  "net_structure_type": 0,
  "num_attention_heads": 12,
  "num_hidden_groups": 1,
  "num_hidden_layers": 12,
  "num_memory_blocks": 0,
  "output_hidden_states": true,

f1 score : 0.14942528735632185
recall score : 0.08552631578947369
precision score : 0.5909090909090909
thresh : 0.69
Enable AWP
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 30s) Loss: 0.6749(0.6749) LR: 0.00002000  
Epoch: [1][100/279] Elapsed 0m 9s (remain 0m 17s) Loss: 0.6100(0.6241) LR: 0.00001974  
Epoch: [1][200/279] Elapsed 0m 19s (remain 0m 7s) Loss: 0.5035(0.6206) LR: 0.00001900  
Epoch: [1][278/279] Elapsed 0m 26s (remain 0m 0s) Loss: 0.6774(0.6137) LR: 0.00001810  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 5s) Loss: 0.6393(0.6393) 


Epoch 1 - avg_train_loss: 0.6137  avg_val_loss: 0.5913  time: 30s
INFO:__main__:Epoch 1 - avg_train_loss: 0.6137  avg_val_loss: 0.5913  time: 30s
Epoch 1 - Score: 0.7082
INFO:__main__:Epoch 1 - Score: 0.7082
Epoch 1 - Save Best Score: 0.7082 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7082 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.5256(0.5913) 
f1 score : 0.11627906976744186
recall score : 0.06578947368421052
precision score : 0.5
thresh : 0.57
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 22s) Loss: 0.7071(0.7071) LR: 0.00001809  
Epoch: [2][100/279] Elapsed 0m 9s (remain 0m 17s) Loss: 0.6520(0.5264) LR: 0.00001657  
Epoch: [2][200/279] Elapsed 0m 19s (remain 0m 7s) Loss: 0.7529(0.5197) LR: 0.00001473  
Epoch: [2][278/279] Elapsed 0m 27s (remain 0m 0s) Loss: 0.3477(0.5220) LR: 0.00001312  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6528(0.6528) 


Epoch 2 - avg_train_loss: 0.5220  avg_val_loss: 0.5931  time: 30s
INFO:__main__:Epoch 2 - avg_train_loss: 0.5220  avg_val_loss: 0.5931  time: 30s
Epoch 2 - Score: 0.7123
INFO:__main__:Epoch 2 - Score: 0.7123
Epoch 2 - Save Best Score: 0.7123 Model
INFO:__main__:Epoch 2 - Save Best Score: 0.7123 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.4944(0.5931) 
f1 score : 0.12048192771084337
recall score : 0.06578947368421052
precision score : 0.7142857142857143
thresh : 0.55
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 29s) Loss: 0.3822(0.3822) LR: 0.00001310  
Epoch: [3][100/279] Elapsed 0m 9s (remain 0m 17s) Loss: 0.5888(0.3962) LR: 0.00001091  
Epoch: [3][200/279] Elapsed 0m 19s (remain 0m 7s) Loss: 0.3200(0.3925) LR: 0.00000867  
Epoch: [3][278/279] Elapsed 0m 27s (remain 0m 0s) Loss: 0.6323(0.3888) LR: 0.00000696  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5903(0.5903) 


Epoch 3 - avg_train_loss: 0.3888  avg_val_loss: 0.5834  time: 30s
INFO:__main__:Epoch 3 - avg_train_loss: 0.3888  avg_val_loss: 0.5834  time: 30s
Epoch 3 - Score: 0.7163
INFO:__main__:Epoch 3 - Score: 0.7163
Epoch 3 - Save Best Score: 0.7163 Model
INFO:__main__:Epoch 3 - Save Best Score: 0.7163 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.5061(0.5834) 
f1 score : 0.29523809523809524
recall score : 0.20394736842105263
precision score : 0.5344827586206896
thresh : 0.52
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 33s) Loss: 0.2004(0.2004) LR: 0.00000694  
Epoch: [4][100/279] Elapsed 0m 10s (remain 0m 17s) Loss: 0.3314(0.2639) LR: 0.00000490  
Epoch: [4][200/279] Elapsed 0m 19s (remain 0m 7s) Loss: 0.2809(0.2581) LR: 0.00000311  
Epoch: [4][278/279] Elapsed 0m 27s (remain 0m 0s) Loss: 0.3434(0.2560) LR: 0.00000195  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5730(0.5730) 


Epoch 4 - avg_train_loss: 0.2560  avg_val_loss: 0.5860  time: 30s
INFO:__main__:Epoch 4 - avg_train_loss: 0.2560  avg_val_loss: 0.5860  time: 30s
Epoch 4 - Score: 0.7143
INFO:__main__:Epoch 4 - Score: 0.7143


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.4839(0.5860) 
f1 score : 0.26
recall score : 0.17105263157894737
precision score : 0.5416666666666666
thresh : 0.54
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 24s) Loss: 0.2037(0.2037) LR: 0.00000194  
Epoch: [5][100/279] Elapsed 0m 10s (remain 0m 17s) Loss: 0.1825(0.1926) LR: 0.00000082  
Epoch: [5][200/279] Elapsed 0m 19s (remain 0m 7s) Loss: 0.1718(0.1967) LR: 0.00000017  
Epoch: [5][278/279] Elapsed 0m 27s (remain 0m 0s) Loss: 0.2086(0.1958) LR: 0.00000000  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5644(0.5644) 


Epoch 5 - avg_train_loss: 0.1958  avg_val_loss: 0.5854  time: 30s
INFO:__main__:Epoch 5 - avg_train_loss: 0.1958  avg_val_loss: 0.5854  time: 30s
Epoch 5 - Score: 0.7163
INFO:__main__:Epoch 5 - Score: 0.7163


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.4840(0.5854) 
f1 score : 0.28712871287128716
recall score : 0.19078947368421054
precision score : 0.58
thresh : 0.51


Score: 0.7022
INFO:__main__:Score: 0.7022
ACC BEST Score: 0.7163
INFO:__main__:ACC BEST Score: 0.7163
AlbertConfig {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_albert_base_v2_epoch10",
  "architectures": [
    "AlbertForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "bos_token_id": 2,
  "classifier_dropout_prob": 0.1,
  "down_scale_factor": 1,
  "embedding_size": 128,
  "eos_token_id": 3,
  "gap_size": 0,
  "hidden_act": "gelu_new",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "inner_group_num": 1,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "albert",
  "net_structure_type": 0,
  "num_attention_heads": 12,
  "num_hidden_groups": 1,
  "num_hidden_layers": 12,
  "num_memory_blocks": 0,
  "output_hidden_states": true,

f1 score : 0.29523809523809524
recall score : 0.20394736842105263
precision score : 0.5344827586206896
thresh : 0.52
Enable AWP
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 36s) Loss: 0.7931(0.7931) LR: 0.00002000  
Epoch: [1][100/279] Elapsed 0m 9s (remain 0m 16s) Loss: 0.8656(0.6177) LR: 0.00001974  
Epoch: [1][200/279] Elapsed 0m 19s (remain 0m 7s) Loss: 0.5007(0.6165) LR: 0.00001900  
Epoch: [1][278/279] Elapsed 0m 26s (remain 0m 0s) Loss: 0.3068(0.6064) LR: 0.00001810  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.7811(0.7811) 


Epoch 1 - avg_train_loss: 0.6064  avg_val_loss: 0.6452  time: 30s
INFO:__main__:Epoch 1 - avg_train_loss: 0.6064  avg_val_loss: 0.6452  time: 30s
Epoch 1 - Score: 0.7223
INFO:__main__:Epoch 1 - Score: 0.7223
Epoch 1 - Save Best Score: 0.7223 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7223 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.9506(0.6452) 
f1 score : 0.07594936708860758
recall score : 0.039473684210526314
precision score : 1.0
thresh : 0.25
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 26s) Loss: 0.5258(0.5258) LR: 0.00001809  
Epoch: [2][100/279] Elapsed 0m 9s (remain 0m 17s) Loss: 0.6706(0.5261) LR: 0.00001657  
Epoch: [2][200/279] Elapsed 0m 19s (remain 0m 7s) Loss: 0.5533(0.5195) LR: 0.00001473  
Epoch: [2][278/279] Elapsed 0m 27s (remain 0m 0s) Loss: 0.6833(0.5159) LR: 0.00001312  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6905(0.6905) 


Epoch 2 - avg_train_loss: 0.5159  avg_val_loss: 0.5821  time: 30s
INFO:__main__:Epoch 2 - avg_train_loss: 0.5159  avg_val_loss: 0.5821  time: 30s
Epoch 2 - Score: 0.7163
INFO:__main__:Epoch 2 - Score: 0.7163


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.8108(0.5821) 
f1 score : 0.16470588235294117
recall score : 0.09210526315789473
precision score : 0.7777777777777778
thresh : 0.47
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 24s) Loss: 0.4087(0.4087) LR: 0.00001310  
Epoch: [3][100/279] Elapsed 0m 9s (remain 0m 17s) Loss: 0.3541(0.3852) LR: 0.00001091  
Epoch: [3][200/279] Elapsed 0m 19s (remain 0m 7s) Loss: 0.4033(0.3809) LR: 0.00000867  
Epoch: [3][278/279] Elapsed 0m 27s (remain 0m 0s) Loss: 0.3853(0.3810) LR: 0.00000696  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.7387(0.7387) 


Epoch 3 - avg_train_loss: 0.3810  avg_val_loss: 0.5914  time: 30s
INFO:__main__:Epoch 3 - avg_train_loss: 0.3810  avg_val_loss: 0.5914  time: 30s
Epoch 3 - Score: 0.7163
INFO:__main__:Epoch 3 - Score: 0.7163


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.8585(0.5914) 
f1 score : 0.16568047337278105
recall score : 0.09210526315789473
precision score : 0.8235294117647058
thresh : 0.4
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 28s) Loss: 0.3518(0.3518) LR: 0.00000694  
Epoch: [4][100/279] Elapsed 0m 9s (remain 0m 17s) Loss: 0.2514(0.2629) LR: 0.00000490  
Epoch: [4][200/279] Elapsed 0m 19s (remain 0m 7s) Loss: 0.3280(0.2560) LR: 0.00000311  
Epoch: [4][278/279] Elapsed 0m 26s (remain 0m 0s) Loss: 0.2321(0.2525) LR: 0.00000195  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.7162(0.7162) 


Epoch 4 - avg_train_loss: 0.2525  avg_val_loss: 0.5694  time: 30s
INFO:__main__:Epoch 4 - avg_train_loss: 0.2525  avg_val_loss: 0.5694  time: 30s
Epoch 4 - Score: 0.7183
INFO:__main__:Epoch 4 - Score: 0.7183


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.8067(0.5694) 
f1 score : 0.2577319587628866
recall score : 0.16447368421052633
precision score : 0.5952380952380952
thresh : 0.53
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 28s) Loss: 0.1723(0.1723) LR: 0.00000194  
Epoch: [5][100/279] Elapsed 0m 9s (remain 0m 17s) Loss: 0.2057(0.1971) LR: 0.00000082  
Epoch: [5][200/279] Elapsed 0m 19s (remain 0m 7s) Loss: 0.1856(0.1974) LR: 0.00000017  
Epoch: [5][278/279] Elapsed 0m 26s (remain 0m 0s) Loss: 0.1954(0.1945) LR: 0.00000000  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.7185(0.7185) 


Epoch 5 - avg_train_loss: 0.1945  avg_val_loss: 0.5689  time: 30s
INFO:__main__:Epoch 5 - avg_train_loss: 0.1945  avg_val_loss: 0.5689  time: 30s
Epoch 5 - Score: 0.7223
INFO:__main__:Epoch 5 - Score: 0.7223


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.8139(0.5689) 
f1 score : 0.2577319587628866
recall score : 0.16447368421052633
precision score : 0.5952380952380952
thresh : 0.56


Score: 0.7062
INFO:__main__:Score: 0.7062
ACC BEST Score: 0.7223
INFO:__main__:ACC BEST Score: 0.7223
AlbertConfig {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_albert_base_v2_epoch10",
  "architectures": [
    "AlbertForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "bos_token_id": 2,
  "classifier_dropout_prob": 0.1,
  "down_scale_factor": 1,
  "embedding_size": 128,
  "eos_token_id": 3,
  "gap_size": 0,
  "hidden_act": "gelu_new",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "inner_group_num": 1,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "albert",
  "net_structure_type": 0,
  "num_attention_heads": 12,
  "num_hidden_groups": 1,
  "num_hidden_layers": 12,
  "num_memory_blocks": 0,
  "output_hidden_states": true,

f1 score : 0.07594936708860758
recall score : 0.039473684210526314
precision score : 1.0
thresh : 0.25
Enable AWP
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 38s) Loss: 0.7532(0.7532) LR: 0.00002000  
Epoch: [1][100/279] Elapsed 0m 10s (remain 0m 17s) Loss: 0.5639(0.6169) LR: 0.00001974  
Epoch: [1][200/279] Elapsed 0m 19s (remain 0m 7s) Loss: 0.4196(0.6029) LR: 0.00001900  
Epoch: [1][278/279] Elapsed 0m 27s (remain 0m 0s) Loss: 0.6356(0.6079) LR: 0.00001810  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6907(0.6907) 


Epoch 1 - avg_train_loss: 0.6079  avg_val_loss: 0.6332  time: 30s
INFO:__main__:Epoch 1 - avg_train_loss: 0.6079  avg_val_loss: 0.6332  time: 30s
Epoch 1 - Score: 0.7103
INFO:__main__:Epoch 1 - Score: 0.7103
Epoch 1 - Save Best Score: 0.7103 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7103 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.7003(0.6332) 
f1 score : 0.462962962962963
recall score : 0.4934210526315789
precision score : 0.436046511627907
thresh : 0.58
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 24s) Loss: 0.5294(0.5294) LR: 0.00001809  
Epoch: [2][100/279] Elapsed 0m 10s (remain 0m 17s) Loss: 0.4951(0.5123) LR: 0.00001657  
Epoch: [2][200/279] Elapsed 0m 19s (remain 0m 7s) Loss: 0.5146(0.5149) LR: 0.00001473  
Epoch: [2][278/279] Elapsed 0m 27s (remain 0m 0s) Loss: 0.4675(0.5134) LR: 0.00001312  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6276(0.6276) 


Epoch 2 - avg_train_loss: 0.5134  avg_val_loss: 0.5633  time: 30s
INFO:__main__:Epoch 2 - avg_train_loss: 0.5134  avg_val_loss: 0.5633  time: 30s
Epoch 2 - Score: 0.7264
INFO:__main__:Epoch 2 - Score: 0.7264
Epoch 2 - Save Best Score: 0.7264 Model
INFO:__main__:Epoch 2 - Save Best Score: 0.7264 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.7315(0.5633) 
f1 score : 0.19999999999999998
recall score : 0.11842105263157894
precision score : 0.6428571428571429
thresh : 0.42
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 36s) Loss: 0.5507(0.5507) LR: 0.00001310  
Epoch: [3][100/279] Elapsed 0m 9s (remain 0m 17s) Loss: 0.4744(0.3760) LR: 0.00001091  
Epoch: [3][200/279] Elapsed 0m 19s (remain 0m 7s) Loss: 0.3878(0.3698) LR: 0.00000867  
Epoch: [3][278/279] Elapsed 0m 26s (remain 0m 0s) Loss: 0.3166(0.3664) LR: 0.00000696  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6140(0.6140) 


Epoch 3 - avg_train_loss: 0.3664  avg_val_loss: 0.5530  time: 30s
INFO:__main__:Epoch 3 - avg_train_loss: 0.3664  avg_val_loss: 0.5530  time: 30s
Epoch 3 - Score: 0.7364
INFO:__main__:Epoch 3 - Score: 0.7364
Epoch 3 - Save Best Score: 0.7364 Model
INFO:__main__:Epoch 3 - Save Best Score: 0.7364 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.7162(0.5530) 
f1 score : 0.2857142857142857
recall score : 0.17763157894736842
precision score : 0.7297297297297297
thresh : 0.52
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 25s) Loss: 0.2553(0.2553) LR: 0.00000694  
Epoch: [4][100/279] Elapsed 0m 9s (remain 0m 17s) Loss: 0.2054(0.2343) LR: 0.00000490  
Epoch: [4][200/279] Elapsed 0m 19s (remain 0m 7s) Loss: 0.2946(0.2358) LR: 0.00000311  
Epoch: [4][278/279] Elapsed 0m 26s (remain 0m 0s) Loss: 0.1989(0.2335) LR: 0.00000195  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6142(0.6142) 


Epoch 4 - avg_train_loss: 0.2335  avg_val_loss: 0.5509  time: 30s
INFO:__main__:Epoch 4 - avg_train_loss: 0.2335  avg_val_loss: 0.5509  time: 30s
Epoch 4 - Score: 0.7384
INFO:__main__:Epoch 4 - Score: 0.7384
Epoch 4 - Save Best Score: 0.7384 Model
INFO:__main__:Epoch 4 - Save Best Score: 0.7384 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.6952(0.5509) 
f1 score : 0.35121951219512193
recall score : 0.23684210526315788
precision score : 0.6792452830188679
thresh : 0.51
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 23s) Loss: 0.1799(0.1799) LR: 0.00000194  
Epoch: [5][100/279] Elapsed 0m 10s (remain 0m 17s) Loss: 0.1633(0.1780) LR: 0.00000082  
Epoch: [5][200/279] Elapsed 0m 19s (remain 0m 7s) Loss: 0.2033(0.1767) LR: 0.00000017  
Epoch: [5][278/279] Elapsed 0m 27s (remain 0m 0s) Loss: 0.1755(0.1765) LR: 0.00000000  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.6135(0.6135) 


Epoch 5 - avg_train_loss: 0.1765  avg_val_loss: 0.5518  time: 30s
INFO:__main__:Epoch 5 - avg_train_loss: 0.1765  avg_val_loss: 0.5518  time: 30s
Epoch 5 - Score: 0.7364
INFO:__main__:Epoch 5 - Score: 0.7364


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.7012(0.5518) 
f1 score : 0.357487922705314
recall score : 0.24342105263157895
precision score : 0.6727272727272727
thresh : 0.53


Score: 0.7324
INFO:__main__:Score: 0.7324
ACC BEST Score: 0.7384
INFO:__main__:ACC BEST Score: 0.7384
AlbertConfig {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_albert_base_v2_epoch10",
  "architectures": [
    "AlbertForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "bos_token_id": 2,
  "classifier_dropout_prob": 0.1,
  "down_scale_factor": 1,
  "embedding_size": 128,
  "eos_token_id": 3,
  "gap_size": 0,
  "hidden_act": "gelu_new",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "inner_group_num": 1,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "albert",
  "net_structure_type": 0,
  "num_attention_heads": 12,
  "num_hidden_groups": 1,
  "num_hidden_layers": 12,
  "num_memory_blocks": 0,
  "output_hidden_states": true,

f1 score : 0.35121951219512193
recall score : 0.23684210526315788
precision score : 0.6792452830188679
thresh : 0.51
Enable AWP
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 36s) Loss: 0.7228(0.7228) LR: 0.00002000  
Epoch: [1][100/279] Elapsed 0m 10s (remain 0m 18s) Loss: 0.6488(0.6287) LR: 0.00001974  
Epoch: [1][200/279] Elapsed 0m 19s (remain 0m 7s) Loss: 0.6155(0.6055) LR: 0.00001900  
Epoch: [1][278/279] Elapsed 0m 27s (remain 0m 0s) Loss: 0.8114(0.6062) LR: 0.00001810  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 5s) Loss: 0.5802(0.5802) 


Epoch 1 - avg_train_loss: 0.6062  avg_val_loss: 0.5893  time: 30s
INFO:__main__:Epoch 1 - avg_train_loss: 0.6062  avg_val_loss: 0.5893  time: 30s
Epoch 1 - Score: 0.7103
INFO:__main__:Epoch 1 - Score: 0.7103
Epoch 1 - Save Best Score: 0.7103 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7103 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.4786(0.5893) 
f1 score : 0.09876543209876543
recall score : 0.05263157894736842
precision score : 0.8
thresh : 0.46
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 21s) Loss: 0.7180(0.7180) LR: 0.00001809  
Epoch: [2][100/279] Elapsed 0m 9s (remain 0m 17s) Loss: 0.8873(0.5252) LR: 0.00001657  
Epoch: [2][200/279] Elapsed 0m 19s (remain 0m 7s) Loss: 0.5460(0.5193) LR: 0.00001473  
Epoch: [2][278/279] Elapsed 0m 26s (remain 0m 0s) Loss: 0.5285(0.5184) LR: 0.00001312  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5892(0.5892) 


Epoch 2 - avg_train_loss: 0.5184  avg_val_loss: 0.5801  time: 30s
INFO:__main__:Epoch 2 - avg_train_loss: 0.5184  avg_val_loss: 0.5801  time: 30s
Epoch 2 - Score: 0.7143
INFO:__main__:Epoch 2 - Score: 0.7143
Epoch 2 - Save Best Score: 0.7143 Model
INFO:__main__:Epoch 2 - Save Best Score: 0.7143 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.4464(0.5801) 
f1 score : 0.17045454545454547
recall score : 0.09868421052631579
precision score : 0.625
thresh : 0.44
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 33s) Loss: 0.4012(0.4012) LR: 0.00001310  
Epoch: [3][100/279] Elapsed 0m 9s (remain 0m 17s) Loss: 0.3177(0.3868) LR: 0.00001091  
Epoch: [3][200/279] Elapsed 0m 19s (remain 0m 7s) Loss: 0.4572(0.3884) LR: 0.00000867  
Epoch: [3][278/279] Elapsed 0m 27s (remain 0m 0s) Loss: 0.2207(0.3854) LR: 0.00000696  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5522(0.5522) 


Epoch 3 - avg_train_loss: 0.3854  avg_val_loss: 0.5756  time: 30s
INFO:__main__:Epoch 3 - avg_train_loss: 0.3854  avg_val_loss: 0.5756  time: 30s
Epoch 3 - Score: 0.7203
INFO:__main__:Epoch 3 - Score: 0.7203
Epoch 3 - Save Best Score: 0.7203 Model
INFO:__main__:Epoch 3 - Save Best Score: 0.7203 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.4390(0.5756) 
f1 score : 0.24083769633507854
recall score : 0.1513157894736842
precision score : 0.5897435897435898
thresh : 0.56
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 25s) Loss: 0.3033(0.3033) LR: 0.00000694  
Epoch: [4][100/279] Elapsed 0m 10s (remain 0m 17s) Loss: 0.2081(0.2502) LR: 0.00000490  
Epoch: [4][200/279] Elapsed 0m 19s (remain 0m 7s) Loss: 0.1643(0.2512) LR: 0.00000311  
Epoch: [4][278/279] Elapsed 0m 27s (remain 0m 0s) Loss: 0.3534(0.2493) LR: 0.00000195  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.5825(0.5825) 


Epoch 4 - avg_train_loss: 0.2493  avg_val_loss: 0.5751  time: 30s
INFO:__main__:Epoch 4 - avg_train_loss: 0.2493  avg_val_loss: 0.5751  time: 30s
Epoch 4 - Score: 0.7143
INFO:__main__:Epoch 4 - Score: 0.7143


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.4669(0.5751) 
f1 score : 0.3119266055045872
recall score : 0.2236842105263158
precision score : 0.5151515151515151
thresh : 0.45
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 26s) Loss: 0.2122(0.2122) LR: 0.00000194  
Epoch: [5][100/279] Elapsed 0m 9s (remain 0m 17s) Loss: 0.2789(0.1930) LR: 0.00000082  
Epoch: [5][200/279] Elapsed 0m 19s (remain 0m 7s) Loss: 0.1185(0.1902) LR: 0.00000017  
Epoch: [5][278/279] Elapsed 0m 26s (remain 0m 0s) Loss: 0.1754(0.1880) LR: 0.00000000  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 5s) Loss: 0.5867(0.5867) 


Epoch 5 - avg_train_loss: 0.1880  avg_val_loss: 0.5761  time: 30s
INFO:__main__:Epoch 5 - avg_train_loss: 0.1880  avg_val_loss: 0.5761  time: 30s
Epoch 5 - Score: 0.7203
INFO:__main__:Epoch 5 - Score: 0.7203


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.4556(0.5761) 
f1 score : 0.3004694835680751
recall score : 0.21052631578947367
precision score : 0.5245901639344263
thresh : 0.44


Score: 0.7082
INFO:__main__:Score: 0.7082
ACC BEST Score: 0.7203
INFO:__main__:ACC BEST Score: 0.7203
Score: 0.7075
INFO:__main__:Score: 0.7075
ACC BEST Score: 0.7099
INFO:__main__:ACC BEST Score: 0.7099


f1 score : 0.24083769633507854
recall score : 0.1513157894736842
precision score : 0.5897435897435898
thresh : 0.56
f1 score : 0.22234099412079103
recall score : 0.1365725541694025
precision score : 0.5977011494252874
thresh : 0.53


In [22]:
from google.colab import runtime
runtime.unassign()