In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [3]:
!pip install sentencepiece

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [4]:
import os
import gc
import math
import time
import random
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.simplefilter('ignore')
from tqdm import tqdm
import re

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader, Dataset

from sklearn.model_selection import StratifiedKFold,StratifiedGroupKFold,GroupKFold
from sklearn.metrics import log_loss,f1_score

from transformers import AutoModel, AutoConfig, AutoTokenizer, AdamW, DataCollatorWithPadding
from transformers import get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [5]:
INPUT_DIR = '/content/drive/MyDrive/Competitions/Signate/Student Cup 2022/input/'
OUTPUT_DIR = '/content/drive/MyDrive/Competitions/Signate/Student Cup 2022/output/'
OUTPUT_SUB_DIR = os.path.join(OUTPUT_DIR,'Submission')
OUTPUT_MODEL_DIR = os.path.join(OUTPUT_DIR,'Model/RoBERTa-large/')

In [6]:
class CFG:
    wandb = False
    apex = True
    model = 'roberta-large'
    seed = 42
    n_splits = 5
    max_len = 128
    dropout = 0.2
    target_size=4
    n_accumulate=1
    print_freq = 50
    min_lr=1e-6
    scheduler = 'cosine'
    batch_size = 16
    num_workers = 2
    lr = 3e-5
    weigth_decay = 0.01
    epochs = 10
    n_fold = 5
    trn_fold = [0, 1, 2, 3, 4]
    train = True 
    num_warmup_steps = 0
    num_cycles=0.5
    debug = False
    debug_ver2 = False
    gradient_checkpointing = False
    freezing = True

In [7]:
# Loss Func
def criterion(outputs, labels):
    return nn.CrossEntropyLoss()(outputs, labels)

def softmax(z):
    assert len(z.shape) == 2
    s = np.max(z, axis=1)
    s = s[:, np.newaxis] # necessary step to do broadcasting
    e_x = np.exp(z - s)
    div = np.sum(e_x, axis=1)
    div = div[:, np.newaxis] # dito
    return e_x / div
"""
def get_score(y_true, y_pred):
    y_pred = softmax(y_pred)
    score = log_loss(y_true, y_pred)
    return round(score, 5)
"""
def get_score(outputs, labels):
    outputs = F.softmax(torch.tensor(outputs)).numpy()
    return f1_score(np.argmax(outputs,axis=1),labels ,average='macro')

def get_logger(filename=OUTPUT_DIR+'train'):
    from logging import getLogger, INFO, FileHandler, Formatter, StreamHandler
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=f"{filename}.log")
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

LOGGER = get_logger()

def seed_everything(seed=CFG.seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_everything(seed=42)

def prepare_input(cfg, text, text_2=None):
    inputs = cfg.tokenizer(text, text_2,
                           padding="max_length",
                           add_special_tokens=True,
                           max_length=cfg.max_len,
                           truncation=True)

    for k, v in inputs.items():
        inputs[k] = torch.tensor(v, dtype=torch.long)
        
    return inputs

In [8]:
def freeze(module):
    """
    Freezes module's parameters.
    """
    
    for parameter in module.parameters():
        parameter.requires_grad = False
        
def get_freezed_parameters(module):
    """
    Returns names of freezed parameters of the given module.
    """
    
    freezed_parameters = []
    for name, parameter in module.named_parameters():
        if not parameter.requires_grad:
            freezed_parameters.append(name)
            
    return freezed_parameters

def set_embedding_parameters_bits(embeddings_path, optim_bits=32):
    """
    https://github.com/huggingface/transformers/issues/14819#issuecomment-1003427930
    """
    
    embedding_types = ("word", "position", "token_type")
    for embedding_type in embedding_types:
        attr_name = f"{embedding_type}_embeddings"
        
        if hasattr(embeddings_path, attr_name): 
            bnb.optim.GlobalOptimManager.get_instance().register_module_override(
                getattr(embeddings_path, attr_name), 'weight', {'optim_bits': optim_bits}
            )

In [9]:
train = pd.read_csv(os.path.join(INPUT_DIR, 'train.csv'))
test = pd.read_csv(os.path.join(INPUT_DIR, 'test.csv'))
submission_df = pd.read_csv(os.path.join(INPUT_DIR, 'submit_sample.csv'))

display(train.head())
print(train.shape)
display(test.head())
print(test.shape)

Unnamed: 0,id,description,jobflag
0,0,<li>Develop cutting-edge web applications that...,3
1,1,"<li> Designs and develops high quality, scalab...",3
2,2,<li>Functions as a point person for Network St...,4
3,3,"<li> Work on the technical design, development...",3
4,4,<li>Quantify the resources required for a task...,4


(1516, 3)


Unnamed: 0,id,description
0,1516,<li>Building decision-making models and propos...
1,1517,<li>Educate homeowners on the benefits of sola...
2,1518,"<li><span>Design, develop, document, and imple..."
3,1519,<li>Apply advanced technical expertise and ski...
4,1520,<li>Project manage and deliver against our roa...


(1517, 2)


In [10]:
def remove_tag(x):
    p = re.compile(r"<[^>]*?>")
    return p.sub('',x)

def cleaning(texts):
    clean_texts = []
    for text in texts:
        # htmlタグを削除
        text = remove_tag(text)
        #アルファベット以外をスペースに置き換え
        #clean_punc = re.sub(r'[^a-zA-Z]', ' ', text)
        clean_texts.append(text)
    return clean_texts



from text_unidecode import unidecode
from typing import Dict, List, Tuple
import codecs

def replace_encoding_with_utf8(error: UnicodeError) -> Tuple[bytes, int]:
    return error.object[error.start : error.end].encode("utf-8"), error.end


def replace_decoding_with_cp1252(error: UnicodeError) -> Tuple[str, int]:
    return error.object[error.start : error.end].decode("cp1252"), error.end

# Register the encoding and decoding error handlers for `utf-8` and `cp1252`.
codecs.register_error("replace_encoding_with_utf8", replace_encoding_with_utf8)
codecs.register_error("replace_decoding_with_cp1252", replace_decoding_with_cp1252)

def resolve_encodings_and_normalize(text: str) -> str:
    """Resolve the encoding problems and normalize the abnormal characters."""
    text = (
        text.encode("raw_unicode_escape")
        .decode("utf-8", errors="replace_decoding_with_cp1252")
        .encode("cp1252", errors="replace_encoding_with_utf8")
        .decode("utf-8", errors="replace_decoding_with_cp1252")
    )
    text = unidecode(text)
    return text

train['description'] = cleaning(train['description'])
test['description'] = cleaning(test['description'])
train['inputs'] = train['description'].apply(lambda x : resolve_encodings_and_normalize(x))
test['inputs'] = test['description'].apply(lambda x : resolve_encodings_and_normalize(x))
#train['inputs'] = train['inputs'].str.strip()
#test['inputs'] = test['inputs'].str.strip()
train = train.rename(columns = {"jobflag": "label"})
train["label"] = train["label"] - 1
train

Unnamed: 0,id,description,label,inputs
0,0,Develop cutting-edge web applications that per...,2,Develop cutting-edge web applications that per...
1,1,"Designs and develops high quality, scalable a...",2,"Designs and develops high quality, scalable a..."
2,2,Functions as a point person for Network Strate...,3,Functions as a point person for Network Strate...
3,3,"Work on the technical design, development, re...",2,"Work on the technical design, development, re..."
4,4,Quantify the resources required for a task/pro...,3,Quantify the resources required for a task/pro...
...,...,...,...,...
1511,1511,"Support detailed reporting, statistical analys...",0,"Support detailed reporting, statistical analys..."
1512,1512,Collaborate with teams to support the ML techn...,1,Collaborate with teams to support the ML techn...
1513,1513,Work with executives and other business leade...,0,Work with executives and other business leade...
1514,1514,Leading design ideation sessions to ensure we ...,2,Leading design ideation sessions to ensure we ...


In [11]:
skf = StratifiedKFold(n_splits=CFG.n_splits,shuffle=True,random_state=CFG.seed)
for fold, ( _, val_) in enumerate(skf.split(train, train.label)):
    train.loc[val_ , "kfold"] = int(fold)
    
train["kfold"] = train["kfold"].astype(int)

In [12]:
tokenizer = AutoTokenizer.from_pretrained(CFG.model)
tokenizer.save_pretrained(OUTPUT_MODEL_DIR+'tokenizer/')
CFG.tokenizer = tokenizer
SEP = tokenizer.sep_token
SEP

'</s>'

In [13]:
class Dataset(Dataset):
    def __init__(self, df, tokenizer, max_length):
        self.df = df
        self.max_len = CFG.max_len
        self.text = df['inputs'].values
        self.tokenizer = CFG.tokenizer
        self.targets = df['label'].values

    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        text = self.text[index]
        inputs = tokenizer.encode_plus(
            text,
            truncation=True,
            add_special_tokens=True,
            max_length = self.max_len
        )
        samples = {
            'input_ids': inputs['input_ids'],
            'attention_mask': inputs['attention_mask'],
            'target': self.targets[index]
        }

        if 'token_type_ids' in inputs:
            samples['token_type_ids'] = inputs['token_type_ids']
            
        return samples

In [14]:
# Dynamic Padding (Collate)
#collate_fn = DataCollatorWithPadding(tokenizer=CFG.tokenizer)
class Collate:
    def __init__(self, tokenizer, isTrain=True):
        self.tokenizer = tokenizer
        self.isTrain = isTrain
        # self.args = args

    def __call__(self, batch):
        output = dict()
        output["input_ids"] = [sample["input_ids"] for sample in batch]
        output["attention_mask"] = [sample["attention_mask"] for sample in batch]
        if self.isTrain:
            output["target"] = [sample["target"] for sample in batch]

        # calculate max token length of this batch
        batch_max = max([len(ids) for ids in output["input_ids"]])

        # add padding
        if self.tokenizer.padding_side == "right":
            output["input_ids"] = [s + (batch_max - len(s)) * [self.tokenizer.pad_token_id] for s in output["input_ids"]]
            output["attention_mask"] = [s + (batch_max - len(s)) * [0] for s in output["attention_mask"]]
        else:
            output["input_ids"] = [(batch_max - len(s)) * [self.tokenizer.pad_token_id] + s for s in output["input_ids"]]
            output["attention_mask"] = [(batch_max - len(s)) * [0] + s for s in output["attention_mask"]]

        # convert to tensors
        output["input_ids"] = torch.tensor(output["input_ids"], dtype=torch.long)
        output["attention_mask"] = torch.tensor(output["attention_mask"], dtype=torch.long)
        if self.isTrain:
            output["target"] = torch.tensor(output["target"], dtype=torch.long)

        return output
    
collate_fn = Collate(CFG.tokenizer, isTrain=True)

In [15]:
class MeanPooling(nn.Module):
    def __init__(self):
        super(MeanPooling, self).__init__()
        
    def forward(self, last_hidden_state, attention_mask):
        input_mask_expanded = attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float()
        sum_embeddings = torch.sum(last_hidden_state * input_mask_expanded, 1)
        sum_mask = input_mask_expanded.sum(1)
        sum_mask = torch.clamp(sum_mask, min=1e-9) #
        mean_embeddings = sum_embeddings / sum_mask
        return mean_embeddings

In [16]:
# ====================================================
# Model
# ====================================================
class CustomModel(nn.Module):
    def __init__(self, model_name):
        super(CustomModel, self).__init__()
        # Header (fast or normal)
        self.model = AutoModel.from_pretrained(model_name)
        
        # Gradient_checkpointing
        if CFG.gradient_checkpointing:
            (self.model).gradient_checkpointing_enable()
        
        # Freezing
        if CFG.freezing:
            # freezing embeddings and first 2 layers of encoder
            freeze((self.model).embeddings)
            freeze((self.model).encoder.layer[:2])
            CFG.after_freezed_parameters = filter(lambda parameter: parameter.requires_grad, (self.model).parameters())
        
        self.config = AutoConfig.from_pretrained(model_name)
        self.fc = nn.Linear(self.config.hidden_size, CFG.target_size)
        
    def forward(self, ids, mask):        
        out = self.model(input_ids=ids, 
                         attention_mask=mask,
                         output_hidden_states=False)
        outputs = out[0][:, 0, :]
        outputs = self.fc(outputs)
        return outputs

In [17]:
def asMinutes(s):
    m = math.floor(s/60)
    s -= m * 60
    return "%dm %ds" % (m, s)

def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return "%s (remain %s)" % (asMinutes(s), asMinutes(rs))

def get_scheduler(cfg, optimizer, num_train_steps):
    if cfg.scheduler == 'linear':
        scheduler = get_linear_schedule_with_warmup(
            optimizer, num_warmup_steps=cfg.num_warmup_steps, num_training_steps=num_train_steps
        )
    elif cfg.scheduler == 'cosine':
        scheduler = get_cosine_schedule_with_warmup(
            optimizer, num_warmup_steps=cfg.num_warmup_steps, num_training_steps=num_train_steps, num_cycles=cfg.num_cycles
        )
    return scheduler

def train_one_epoch(model, optimizer, scheduler, dataloader, device, epoch):
    model.train()

    dataset_size = 0
    running_loss = 0

    start = end = time.time()

    for step, data in enumerate(dataloader):
        ids = data['input_ids'].to(device, dtype=torch.long)
        mask = data['attention_mask'].to(device, dtype=torch.long)
        targets = data['target'].to(device, dtype=torch.long)

        batch_size = ids.size(0)
        
        outputs = model(ids, mask)
        loss = criterion(outputs, targets)

        #accumulate
        loss = loss / CFG.n_accumulate 
        loss.backward()
        if (step +1) % CFG.n_accumulate == 0:
            optimizer.step()

            optimizer.zero_grad()
            if scheduler is not None:
                scheduler.step()
        running_loss += (loss.item() * batch_size)
        dataset_size += batch_size

        epoch_loss = running_loss / dataset_size

        end = time.time()
        
        if step % CFG.print_freq == 0 or step == (len(dataloader)-1):
            print('Epoch: [{0}][{1}/{2}] '
                  'Elapsed {remain:s} '
                  .format(epoch+1, step, len(dataloader), 
                          remain=timeSince(start, float(step+1)/len(dataloader))))

    gc.collect()

    return epoch_loss


@torch.no_grad()
def valid_one_epoch(model, dataloader, device, epoch):
    model.eval()

    dataset_size = 0
    running_loss = 0

    start = end = time.time()
    pred = []

    for step, data in enumerate(dataloader):
        ids = data['input_ids'].to(device, dtype=torch.long)
        mask = data['attention_mask'].to(device, dtype=torch.long)
        targets = data['target'].to(device, dtype=torch.long)

        batch_size = ids.size(0)
        outputs = model(ids, mask)
        loss = criterion(outputs, targets)
        pred.append(outputs.to('cpu').numpy())

        running_loss += (loss.item()* batch_size)
        dataset_size += batch_size

        epoch_loss = running_loss / dataset_size

        end = time.time()

        if step % CFG.print_freq == 0 or step == (len(dataloader)-1):
            print('EVAL: [{0}/{1}] '
                  'Elapsed {remain:s} '
                  .format(step, len(dataloader),
                          remain=timeSince(start, float(step+1)/len(dataloader))))
            
    pred = np.concatenate(pred)
            
    return epoch_loss, pred

In [18]:
def train_loop(fold):
    #wandb.watch(model, log_freq=100)

    LOGGER.info(f'-------------fold:{fold} training-------------')

    train_data = train[train.kfold != fold].reset_index(drop=True)
    valid_data = train[train.kfold == fold].reset_index(drop=True)
    valid_labels = valid_data.label.values

    trainDataset = Dataset(train_data, CFG.tokenizer, CFG.max_len)
    validDataset = Dataset(valid_data, CFG.tokenizer, CFG.max_len)

    train_loader = DataLoader(trainDataset,
                              batch_size = CFG.batch_size,
                              shuffle=True,
                              collate_fn = collate_fn,
                              num_workers = CFG.num_workers,
                              pin_memory = True,
                              drop_last=True)
    
    valid_loader = DataLoader(validDataset,
                              batch_size = CFG.batch_size*2,
                              shuffle=False,
                              collate_fn = collate_fn,
                              num_workers = CFG.num_workers,
                              pin_memory = True,
                              drop_last=False)
    
    model = CustomModel(CFG.model)
    torch.save(model.config, OUTPUT_MODEL_DIR+'config.pth')
    model.to(device)
    optimizer = AdamW(model.parameters(), lr=CFG.lr, weight_decay=CFG.weigth_decay)
    num_train_steps = int(len(train_data) / CFG.batch_size * CFG.epochs)
    scheduler = get_scheduler(CFG, optimizer, num_train_steps)

    # loop
    best_score = 0

    for epoch in range(CFG.epochs):
        start_time = time.time()

        train_epoch_loss = train_one_epoch(model, optimizer, scheduler, train_loader, device, epoch)
        valid_epoch_loss, pred = valid_one_epoch(model, valid_loader, device, epoch)

        score = get_score(pred, valid_labels)

        elapsed = time.time() - start_time

        LOGGER.info(f'Epoch {epoch+1} - avg_train_loss: {train_epoch_loss:.4f}  avg_val_loss: {valid_epoch_loss:.4f}  time: {elapsed:.0f}s')
        LOGGER.info(f'Epoch {epoch+1} - Score: {score:.4f}')
        if CFG.wandb:
            wandb.log({f"[fold{fold}] epoch": epoch+1, 
                       f"[fold{fold}] avg_train_loss": train_epoch_loss, 
                       f"[fold{fold}] avg_val_loss": valid_epoch_loss,
                       f"[fold{fold}] score": score})
            
        if score > best_score:
            best_score = score
            LOGGER.info(f'Epoch {epoch+1} - Save Best Score: {best_score:.4f} Model')
            torch.save({'model': model.state_dict(),
                        'predictions': pred},
                        OUTPUT_MODEL_DIR+f"{CFG.model.replace('/', '-')}_fold{fold}_best.pth")
            
    predictions = torch.load(OUTPUT_MODEL_DIR+f"{CFG.model.replace('/', '-')}_fold{fold}_best.pth", 
                             map_location=torch.device('cpu'))['predictions']
    valid_data['Data scientist'] = predictions[:, 0]
    valid_data['Machine learning engineer'] = predictions[:, 1]
    valid_data['Software engineer'] = predictions[:, 2]
    valid_data['Consultant'] = predictions[:, 3]
    
    
    temp = valid_data[['Data scientist','Machine learning engineer','Software engineer','Consultant']].values.tolist()
    print(get_score(temp, valid_data['label'].values))

    torch.cuda.empty_cache()
    gc.collect()
    
    return valid_data

In [19]:
if __name__ == '__main__':
    
    def get_result(oof_df):
        labels = oof_df['label'].values
        preds = oof_df[['Data scientist','Machine learning engineer','Software engineer','Consultant']].values.tolist()
        score = get_score(preds, labels)
        LOGGER.info(f'Score: {score:<.4f}')
    
    if CFG.train:
        oof_df = pd.DataFrame()
        for fold in range(CFG.n_fold):
        #for fold in range(1):
            if fold in CFG.trn_fold:
                _oof_df = train_loop(fold)
                oof_df = pd.concat([oof_df, _oof_df])
                LOGGER.info(f"========== fold: {fold} result ==========")
                get_result(_oof_df) 
        oof_df = oof_df.reset_index(drop=True)
        LOGGER.info(f"========== CV ==========")
        get_result(oof_df)
        oof_df.to_pickle(OUTPUT_MODEL_DIR+'oof_df.pkl')
        oof_df.to_csv(OUTPUT_MODEL_DIR+f'oof_df.csv', index=False)

-------------fold:0 training-------------
Some weights of the model checkpoint at roberta-large were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: [1][0/75] Elapsed 0m 4s (remain 4m 59s) 
Epoch: [1][50/75] Elapsed 0m 58s (remain 0m 27s) 
Epoch: [1][74/75] Elapsed 1m 24s (remain 0m 0s) 
EVAL: [0/10] Elapsed 0m 0s (remain 0m 7s) 


Epoch 1 - avg_train_loss: 1.0727  avg_val_loss: 0.7806  time: 91s
Epoch 1 - Score: 0.6700
Epoch 1 - Save Best Score: 0.6700 Model


EVAL: [9/10] Elapsed 0m 6s (remain 0m 0s) 
Epoch: [2][0/75] Elapsed 0m 1s (remain 1m 37s) 
Epoch: [2][50/75] Elapsed 0m 55s (remain 0m 26s) 
Epoch: [2][74/75] Elapsed 1m 21s (remain 0m 0s) 
EVAL: [0/10] Elapsed 0m 0s (remain 0m 7s) 


Epoch 2 - avg_train_loss: 0.7215  avg_val_loss: 0.6636  time: 89s
Epoch 2 - Score: 0.6029


EVAL: [9/10] Elapsed 0m 6s (remain 0m 0s) 
Epoch: [3][0/75] Elapsed 0m 1s (remain 1m 33s) 
Epoch: [3][50/75] Elapsed 0m 55s (remain 0m 26s) 
Epoch: [3][74/75] Elapsed 1m 21s (remain 0m 0s) 
EVAL: [0/10] Elapsed 0m 0s (remain 0m 7s) 


Epoch 3 - avg_train_loss: 0.4991  avg_val_loss: 0.6722  time: 89s
Epoch 3 - Score: 0.7606
Epoch 3 - Save Best Score: 0.7606 Model


EVAL: [9/10] Elapsed 0m 6s (remain 0m 0s) 
Epoch: [4][0/75] Elapsed 0m 1s (remain 1m 36s) 
Epoch: [4][50/75] Elapsed 0m 55s (remain 0m 26s) 
Epoch: [4][74/75] Elapsed 1m 21s (remain 0m 0s) 
EVAL: [0/10] Elapsed 0m 0s (remain 0m 7s) 


Epoch 4 - avg_train_loss: 0.3890  avg_val_loss: 0.6357  time: 89s
Epoch 4 - Score: 0.7641
Epoch 4 - Save Best Score: 0.7641 Model


EVAL: [9/10] Elapsed 0m 6s (remain 0m 0s) 
Epoch: [5][0/75] Elapsed 0m 1s (remain 1m 36s) 
Epoch: [5][50/75] Elapsed 0m 56s (remain 0m 26s) 
Epoch: [5][74/75] Elapsed 1m 21s (remain 0m 0s) 
EVAL: [0/10] Elapsed 0m 0s (remain 0m 7s) 


Epoch 5 - avg_train_loss: 0.1704  avg_val_loss: 0.8764  time: 89s
Epoch 5 - Score: 0.7209


EVAL: [9/10] Elapsed 0m 6s (remain 0m 0s) 
Epoch: [6][0/75] Elapsed 0m 1s (remain 1m 31s) 
Epoch: [6][50/75] Elapsed 0m 55s (remain 0m 26s) 
Epoch: [6][74/75] Elapsed 1m 21s (remain 0m 0s) 
EVAL: [0/10] Elapsed 0m 0s (remain 0m 7s) 


Epoch 6 - avg_train_loss: 0.0979  avg_val_loss: 0.9879  time: 89s
Epoch 6 - Score: 0.7646
Epoch 6 - Save Best Score: 0.7646 Model


EVAL: [9/10] Elapsed 0m 6s (remain 0m 0s) 
Epoch: [7][0/75] Elapsed 0m 1s (remain 1m 38s) 
Epoch: [7][50/75] Elapsed 0m 55s (remain 0m 26s) 
Epoch: [7][74/75] Elapsed 1m 21s (remain 0m 0s) 
EVAL: [0/10] Elapsed 0m 0s (remain 0m 7s) 


Epoch 7 - avg_train_loss: 0.0389  avg_val_loss: 0.9160  time: 89s
Epoch 7 - Score: 0.7492


EVAL: [9/10] Elapsed 0m 6s (remain 0m 0s) 
Epoch: [8][0/75] Elapsed 0m 1s (remain 1m 32s) 
Epoch: [8][50/75] Elapsed 0m 55s (remain 0m 26s) 
Epoch: [8][74/75] Elapsed 1m 21s (remain 0m 0s) 
EVAL: [0/10] Elapsed 0m 0s (remain 0m 7s) 


Epoch 8 - avg_train_loss: 0.0199  avg_val_loss: 0.9954  time: 88s
Epoch 8 - Score: 0.7668
Epoch 8 - Save Best Score: 0.7668 Model


EVAL: [9/10] Elapsed 0m 6s (remain 0m 0s) 
Epoch: [9][0/75] Elapsed 0m 1s (remain 1m 32s) 
Epoch: [9][50/75] Elapsed 0m 55s (remain 0m 26s) 
Epoch: [9][74/75] Elapsed 1m 21s (remain 0m 0s) 
EVAL: [0/10] Elapsed 0m 0s (remain 0m 7s) 


Epoch 9 - avg_train_loss: 0.0077  avg_val_loss: 0.9713  time: 89s
Epoch 9 - Score: 0.7783
Epoch 9 - Save Best Score: 0.7783 Model


EVAL: [9/10] Elapsed 0m 6s (remain 0m 0s) 
Epoch: [10][0/75] Elapsed 0m 1s (remain 1m 33s) 
Epoch: [10][50/75] Elapsed 0m 55s (remain 0m 26s) 
Epoch: [10][74/75] Elapsed 1m 21s (remain 0m 0s) 
EVAL: [0/10] Elapsed 0m 0s (remain 0m 7s) 


Epoch 10 - avg_train_loss: 0.0088  avg_val_loss: 0.9721  time: 89s
Epoch 10 - Score: 0.7783


EVAL: [9/10] Elapsed 0m 6s (remain 0m 0s) 


Score: 0.7783
-------------fold:1 training-------------


0.7783145172962802


Some weights of the model checkpoint at roberta-large were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: [1][0/75] Elapsed 0m 1s (remain 1m 34s) 
Epoch: [1][50/75] Elapsed 0m 55s (remain 0m 26s) 
Epoch: [1][74/75] Elapsed 1m 21s (remain 0m 0s) 
EVAL: [0/10] Elapsed 0m 0s (remain 0m 7s) 


Epoch 1 - avg_train_loss: 1.0845  avg_val_loss: 1.2832  time: 89s
Epoch 1 - Score: 0.1184
Epoch 1 - Save Best Score: 0.1184 Model


EVAL: [9/10] Elapsed 0m 6s (remain 0m 0s) 
Epoch: [2][0/75] Elapsed 0m 1s (remain 1m 33s) 
Epoch: [2][50/75] Elapsed 0m 55s (remain 0m 26s) 
Epoch: [2][74/75] Elapsed 1m 21s (remain 0m 0s) 
EVAL: [0/10] Elapsed 0m 0s (remain 0m 7s) 


Epoch 2 - avg_train_loss: 1.2751  avg_val_loss: 1.2630  time: 88s
Epoch 2 - Score: 0.1184


EVAL: [9/10] Elapsed 0m 6s (remain 0m 0s) 
Epoch: [3][0/75] Elapsed 0m 1s (remain 1m 29s) 
Epoch: [3][50/75] Elapsed 0m 55s (remain 0m 26s) 
Epoch: [3][74/75] Elapsed 1m 21s (remain 0m 0s) 
EVAL: [0/10] Elapsed 0m 0s (remain 0m 7s) 


Epoch 3 - avg_train_loss: 1.1156  avg_val_loss: 0.8550  time: 88s
Epoch 3 - Score: 0.5413
Epoch 3 - Save Best Score: 0.5413 Model


EVAL: [9/10] Elapsed 0m 6s (remain 0m 0s) 
Epoch: [4][0/75] Elapsed 0m 1s (remain 1m 36s) 
Epoch: [4][50/75] Elapsed 0m 55s (remain 0m 26s) 
Epoch: [4][74/75] Elapsed 1m 21s (remain 0m 0s) 
EVAL: [0/10] Elapsed 0m 0s (remain 0m 7s) 


Epoch 4 - avg_train_loss: 0.7836  avg_val_loss: 0.7725  time: 89s
Epoch 4 - Score: 0.6392
Epoch 4 - Save Best Score: 0.6392 Model


EVAL: [9/10] Elapsed 0m 6s (remain 0m 0s) 
Epoch: [5][0/75] Elapsed 0m 1s (remain 1m 34s) 
Epoch: [5][50/75] Elapsed 0m 56s (remain 0m 26s) 
Epoch: [5][74/75] Elapsed 1m 21s (remain 0m 0s) 
EVAL: [0/10] Elapsed 0m 0s (remain 0m 7s) 


Epoch 5 - avg_train_loss: 0.6053  avg_val_loss: 0.6639  time: 89s
Epoch 5 - Score: 0.6186


EVAL: [9/10] Elapsed 0m 6s (remain 0m 0s) 
Epoch: [6][0/75] Elapsed 0m 1s (remain 1m 29s) 
Epoch: [6][50/75] Elapsed 0m 55s (remain 0m 26s) 
Epoch: [6][74/75] Elapsed 1m 21s (remain 0m 0s) 
EVAL: [0/10] Elapsed 0m 0s (remain 0m 7s) 


Epoch 6 - avg_train_loss: 0.4645  avg_val_loss: 0.6901  time: 88s
Epoch 6 - Score: 0.6996
Epoch 6 - Save Best Score: 0.6996 Model


EVAL: [9/10] Elapsed 0m 6s (remain 0m 0s) 
Epoch: [7][0/75] Elapsed 0m 1s (remain 1m 34s) 
Epoch: [7][50/75] Elapsed 0m 55s (remain 0m 26s) 
Epoch: [7][74/75] Elapsed 1m 21s (remain 0m 0s) 
EVAL: [0/10] Elapsed 0m 0s (remain 0m 7s) 


Epoch 7 - avg_train_loss: 0.3209  avg_val_loss: 0.7493  time: 89s
Epoch 7 - Score: 0.6947


EVAL: [9/10] Elapsed 0m 6s (remain 0m 0s) 
Epoch: [8][0/75] Elapsed 0m 1s (remain 1m 31s) 
Epoch: [8][50/75] Elapsed 0m 55s (remain 0m 26s) 
Epoch: [8][74/75] Elapsed 1m 21s (remain 0m 0s) 
EVAL: [0/10] Elapsed 0m 0s (remain 0m 7s) 


Epoch 8 - avg_train_loss: 0.2433  avg_val_loss: 0.8140  time: 88s
Epoch 8 - Score: 0.7366
Epoch 8 - Save Best Score: 0.7366 Model


EVAL: [9/10] Elapsed 0m 6s (remain 0m 0s) 
Epoch: [9][0/75] Elapsed 0m 1s (remain 1m 36s) 
Epoch: [9][50/75] Elapsed 0m 55s (remain 0m 26s) 
Epoch: [9][74/75] Elapsed 1m 21s (remain 0m 0s) 
EVAL: [0/10] Elapsed 0m 0s (remain 0m 7s) 


Epoch 9 - avg_train_loss: 0.1929  avg_val_loss: 0.8280  time: 89s
Epoch 9 - Score: 0.7395
Epoch 9 - Save Best Score: 0.7395 Model


EVAL: [9/10] Elapsed 0m 6s (remain 0m 0s) 
Epoch: [10][0/75] Elapsed 0m 1s (remain 1m 33s) 
Epoch: [10][50/75] Elapsed 0m 55s (remain 0m 26s) 
Epoch: [10][74/75] Elapsed 1m 21s (remain 0m 0s) 
EVAL: [0/10] Elapsed 0m 0s (remain 0m 7s) 


Epoch 10 - avg_train_loss: 0.1675  avg_val_loss: 0.8315  time: 89s
Epoch 10 - Score: 0.7395


EVAL: [9/10] Elapsed 0m 6s (remain 0m 0s) 


Score: 0.7395
-------------fold:2 training-------------


0.739527294335222


Some weights of the model checkpoint at roberta-large were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: [1][0/75] Elapsed 0m 1s (remain 1m 33s) 
Epoch: [1][50/75] Elapsed 0m 55s (remain 0m 26s) 
Epoch: [1][74/75] Elapsed 1m 21s (remain 0m 0s) 
EVAL: [0/10] Elapsed 0m 0s (remain 0m 7s) 


Epoch 1 - avg_train_loss: 1.1550  avg_val_loss: 0.8214  time: 89s
Epoch 1 - Score: 0.5981
Epoch 1 - Save Best Score: 0.5981 Model


EVAL: [9/10] Elapsed 0m 6s (remain 0m 0s) 
Epoch: [2][0/75] Elapsed 0m 1s (remain 1m 31s) 
Epoch: [2][50/75] Elapsed 0m 55s (remain 0m 26s) 
Epoch: [2][74/75] Elapsed 1m 21s (remain 0m 0s) 
EVAL: [0/10] Elapsed 0m 0s (remain 0m 7s) 


Epoch 2 - avg_train_loss: 0.7765  avg_val_loss: 0.6416  time: 89s
Epoch 2 - Score: 0.6938
Epoch 2 - Save Best Score: 0.6938 Model


EVAL: [9/10] Elapsed 0m 6s (remain 0m 0s) 
Epoch: [3][0/75] Elapsed 0m 1s (remain 1m 33s) 
Epoch: [3][50/75] Elapsed 0m 55s (remain 0m 26s) 
Epoch: [3][74/75] Elapsed 1m 21s (remain 0m 0s) 
EVAL: [0/10] Elapsed 0m 0s (remain 0m 7s) 


Epoch 3 - avg_train_loss: 0.5383  avg_val_loss: 0.6089  time: 89s
Epoch 3 - Score: 0.6913


EVAL: [9/10] Elapsed 0m 6s (remain 0m 0s) 
Epoch: [4][0/75] Elapsed 0m 1s (remain 1m 29s) 
Epoch: [4][50/75] Elapsed 0m 55s (remain 0m 26s) 
Epoch: [4][74/75] Elapsed 1m 21s (remain 0m 0s) 
EVAL: [0/10] Elapsed 0m 0s (remain 0m 7s) 


Epoch 4 - avg_train_loss: 0.3828  avg_val_loss: 0.6711  time: 89s
Epoch 4 - Score: 0.6701


EVAL: [9/10] Elapsed 0m 6s (remain 0m 0s) 
Epoch: [5][0/75] Elapsed 0m 1s (remain 1m 30s) 
Epoch: [5][50/75] Elapsed 0m 55s (remain 0m 26s) 
Epoch: [5][74/75] Elapsed 1m 21s (remain 0m 0s) 
EVAL: [0/10] Elapsed 0m 0s (remain 0m 7s) 


Epoch 5 - avg_train_loss: 0.2253  avg_val_loss: 0.7939  time: 88s
Epoch 5 - Score: 0.6809


EVAL: [9/10] Elapsed 0m 6s (remain 0m 0s) 
Epoch: [6][0/75] Elapsed 0m 1s (remain 1m 31s) 
Epoch: [6][50/75] Elapsed 0m 55s (remain 0m 26s) 
Epoch: [6][74/75] Elapsed 1m 21s (remain 0m 0s) 
EVAL: [0/10] Elapsed 0m 0s (remain 0m 7s) 


Epoch 6 - avg_train_loss: 0.1296  avg_val_loss: 0.9624  time: 89s
Epoch 6 - Score: 0.6558


EVAL: [9/10] Elapsed 0m 6s (remain 0m 0s) 
Epoch: [7][0/75] Elapsed 0m 1s (remain 1m 29s) 
Epoch: [7][50/75] Elapsed 0m 55s (remain 0m 26s) 
Epoch: [7][74/75] Elapsed 1m 21s (remain 0m 0s) 
EVAL: [0/10] Elapsed 0m 0s (remain 0m 7s) 


Epoch 7 - avg_train_loss: 0.0461  avg_val_loss: 1.0343  time: 88s
Epoch 7 - Score: 0.6960
Epoch 7 - Save Best Score: 0.6960 Model


EVAL: [9/10] Elapsed 0m 6s (remain 0m 0s) 
Epoch: [8][0/75] Elapsed 0m 1s (remain 1m 31s) 
Epoch: [8][50/75] Elapsed 0m 55s (remain 0m 26s) 
Epoch: [8][74/75] Elapsed 1m 21s (remain 0m 0s) 
EVAL: [0/10] Elapsed 0m 0s (remain 0m 7s) 


Epoch 8 - avg_train_loss: 0.0283  avg_val_loss: 1.0765  time: 89s
Epoch 8 - Score: 0.7019
Epoch 8 - Save Best Score: 0.7019 Model


EVAL: [9/10] Elapsed 0m 6s (remain 0m 0s) 
Epoch: [9][0/75] Elapsed 0m 1s (remain 1m 36s) 
Epoch: [9][50/75] Elapsed 0m 55s (remain 0m 26s) 
Epoch: [9][74/75] Elapsed 1m 21s (remain 0m 0s) 
EVAL: [0/10] Elapsed 0m 0s (remain 0m 7s) 


Epoch 9 - avg_train_loss: 0.0196  avg_val_loss: 1.0859  time: 89s
Epoch 9 - Score: 0.7029
Epoch 9 - Save Best Score: 0.7029 Model


EVAL: [9/10] Elapsed 0m 6s (remain 0m 0s) 
Epoch: [10][0/75] Elapsed 0m 1s (remain 1m 34s) 
Epoch: [10][50/75] Elapsed 0m 55s (remain 0m 26s) 
Epoch: [10][74/75] Elapsed 1m 21s (remain 0m 0s) 
EVAL: [0/10] Elapsed 0m 0s (remain 0m 7s) 


Epoch 10 - avg_train_loss: 0.0124  avg_val_loss: 1.0874  time: 89s
Epoch 10 - Score: 0.6949


EVAL: [9/10] Elapsed 0m 6s (remain 0m 0s) 


Score: 0.7029
-------------fold:3 training-------------


0.7028702187497704


Some weights of the model checkpoint at roberta-large were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: [1][0/75] Elapsed 0m 1s (remain 1m 33s) 
Epoch: [1][50/75] Elapsed 0m 55s (remain 0m 26s) 
Epoch: [1][74/75] Elapsed 1m 21s (remain 0m 0s) 
EVAL: [0/10] Elapsed 0m 0s (remain 0m 7s) 


Epoch 1 - avg_train_loss: 1.0624  avg_val_loss: 0.7862  time: 89s
Epoch 1 - Score: 0.5370
Epoch 1 - Save Best Score: 0.5370 Model


EVAL: [9/10] Elapsed 0m 6s (remain 0m 0s) 
Epoch: [2][0/75] Elapsed 0m 1s (remain 1m 32s) 
Epoch: [2][50/75] Elapsed 0m 55s (remain 0m 26s) 
Epoch: [2][74/75] Elapsed 1m 21s (remain 0m 0s) 
EVAL: [0/10] Elapsed 0m 0s (remain 0m 7s) 


Epoch 2 - avg_train_loss: 0.7176  avg_val_loss: 0.6613  time: 89s
Epoch 2 - Score: 0.5903
Epoch 2 - Save Best Score: 0.5903 Model


EVAL: [9/10] Elapsed 0m 6s (remain 0m 0s) 
Epoch: [3][0/75] Elapsed 0m 1s (remain 1m 32s) 
Epoch: [3][50/75] Elapsed 0m 55s (remain 0m 26s) 
Epoch: [3][74/75] Elapsed 1m 21s (remain 0m 0s) 
EVAL: [0/10] Elapsed 0m 0s (remain 0m 7s) 


Epoch 3 - avg_train_loss: 0.5069  avg_val_loss: 0.7043  time: 89s
Epoch 3 - Score: 0.6283
Epoch 3 - Save Best Score: 0.6283 Model


EVAL: [9/10] Elapsed 0m 6s (remain 0m 0s) 
Epoch: [4][0/75] Elapsed 0m 1s (remain 1m 34s) 
Epoch: [4][50/75] Elapsed 0m 55s (remain 0m 26s) 
Epoch: [4][74/75] Elapsed 1m 21s (remain 0m 0s) 
EVAL: [0/10] Elapsed 0m 0s (remain 0m 7s) 


Epoch 4 - avg_train_loss: 0.3321  avg_val_loss: 0.6825  time: 89s
Epoch 4 - Score: 0.7314
Epoch 4 - Save Best Score: 0.7314 Model


EVAL: [9/10] Elapsed 0m 6s (remain 0m 0s) 
Epoch: [5][0/75] Elapsed 0m 1s (remain 1m 33s) 
Epoch: [5][50/75] Elapsed 0m 55s (remain 0m 26s) 
Epoch: [5][74/75] Elapsed 1m 21s (remain 0m 0s) 
EVAL: [0/10] Elapsed 0m 0s (remain 0m 7s) 


Epoch 5 - avg_train_loss: 0.1993  avg_val_loss: 0.7662  time: 89s
Epoch 5 - Score: 0.7277


EVAL: [9/10] Elapsed 0m 6s (remain 0m 0s) 
Epoch: [6][0/75] Elapsed 0m 1s (remain 1m 29s) 
Epoch: [6][50/75] Elapsed 0m 55s (remain 0m 26s) 
Epoch: [6][74/75] Elapsed 1m 21s (remain 0m 0s) 
EVAL: [0/10] Elapsed 0m 0s (remain 0m 7s) 


Epoch 6 - avg_train_loss: 0.1299  avg_val_loss: 0.8867  time: 89s
Epoch 6 - Score: 0.7280


EVAL: [9/10] Elapsed 0m 6s (remain 0m 0s) 
Epoch: [7][0/75] Elapsed 0m 1s (remain 1m 30s) 
Epoch: [7][50/75] Elapsed 0m 55s (remain 0m 26s) 
Epoch: [7][74/75] Elapsed 1m 21s (remain 0m 0s) 
EVAL: [0/10] Elapsed 0m 0s (remain 0m 7s) 


Epoch 7 - avg_train_loss: 0.0787  avg_val_loss: 0.9010  time: 89s
Epoch 7 - Score: 0.7289


EVAL: [9/10] Elapsed 0m 6s (remain 0m 0s) 
Epoch: [8][0/75] Elapsed 0m 1s (remain 1m 30s) 
Epoch: [8][50/75] Elapsed 0m 55s (remain 0m 26s) 
Epoch: [8][74/75] Elapsed 1m 21s (remain 0m 0s) 
EVAL: [0/10] Elapsed 0m 0s (remain 0m 7s) 


Epoch 8 - avg_train_loss: 0.0424  avg_val_loss: 0.9468  time: 88s
Epoch 8 - Score: 0.7488
Epoch 8 - Save Best Score: 0.7488 Model


EVAL: [9/10] Elapsed 0m 6s (remain 0m 0s) 
Epoch: [9][0/75] Elapsed 0m 1s (remain 1m 36s) 
Epoch: [9][50/75] Elapsed 0m 55s (remain 0m 26s) 
Epoch: [9][74/75] Elapsed 1m 21s (remain 0m 0s) 
EVAL: [0/10] Elapsed 0m 0s (remain 0m 7s) 


Epoch 9 - avg_train_loss: 0.0313  avg_val_loss: 0.9446  time: 89s
Epoch 9 - Score: 0.7288


EVAL: [9/10] Elapsed 0m 6s (remain 0m 0s) 
Epoch: [10][0/75] Elapsed 0m 1s (remain 1m 29s) 
Epoch: [10][50/75] Elapsed 0m 55s (remain 0m 26s) 
Epoch: [10][74/75] Elapsed 1m 21s (remain 0m 0s) 
EVAL: [0/10] Elapsed 0m 0s (remain 0m 7s) 


Epoch 10 - avg_train_loss: 0.0226  avg_val_loss: 0.9510  time: 88s
Epoch 10 - Score: 0.7289


EVAL: [9/10] Elapsed 0m 6s (remain 0m 0s) 


Score: 0.7488
-------------fold:4 training-------------


0.7488026615960828


Some weights of the model checkpoint at roberta-large were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: [1][0/75] Elapsed 0m 1s (remain 1m 33s) 
Epoch: [1][50/75] Elapsed 0m 55s (remain 0m 26s) 
Epoch: [1][74/75] Elapsed 1m 21s (remain 0m 0s) 
EVAL: [0/10] Elapsed 0m 0s (remain 0m 7s) 


Epoch 1 - avg_train_loss: 0.9949  avg_val_loss: 0.8997  time: 89s
Epoch 1 - Score: 0.6319
Epoch 1 - Save Best Score: 0.6319 Model


EVAL: [9/10] Elapsed 0m 6s (remain 0m 0s) 
Epoch: [2][0/75] Elapsed 0m 1s (remain 1m 31s) 
Epoch: [2][50/75] Elapsed 0m 55s (remain 0m 26s) 
Epoch: [2][74/75] Elapsed 1m 21s (remain 0m 0s) 
EVAL: [0/10] Elapsed 0m 0s (remain 0m 7s) 


Epoch 2 - avg_train_loss: 0.6630  avg_val_loss: 0.6291  time: 89s
Epoch 2 - Score: 0.7084
Epoch 2 - Save Best Score: 0.7084 Model


EVAL: [9/10] Elapsed 0m 6s (remain 0m 0s) 
Epoch: [3][0/75] Elapsed 0m 1s (remain 1m 35s) 
Epoch: [3][50/75] Elapsed 0m 55s (remain 0m 26s) 
Epoch: [3][74/75] Elapsed 1m 21s (remain 0m 0s) 
EVAL: [0/10] Elapsed 0m 0s (remain 0m 7s) 


Epoch 3 - avg_train_loss: 0.4413  avg_val_loss: 0.6301  time: 89s
Epoch 3 - Score: 0.7034


EVAL: [9/10] Elapsed 0m 6s (remain 0m 0s) 
Epoch: [4][0/75] Elapsed 0m 1s (remain 1m 31s) 
Epoch: [4][50/75] Elapsed 0m 55s (remain 0m 26s) 
Epoch: [4][74/75] Elapsed 1m 21s (remain 0m 0s) 
EVAL: [0/10] Elapsed 0m 0s (remain 0m 7s) 


Epoch 4 - avg_train_loss: 0.2992  avg_val_loss: 0.8099  time: 88s
Epoch 4 - Score: 0.7178
Epoch 4 - Save Best Score: 0.7178 Model


EVAL: [9/10] Elapsed 0m 6s (remain 0m 0s) 
Epoch: [5][0/75] Elapsed 0m 1s (remain 1m 35s) 
Epoch: [5][50/75] Elapsed 0m 55s (remain 0m 26s) 
Epoch: [5][74/75] Elapsed 1m 21s (remain 0m 0s) 
EVAL: [0/10] Elapsed 0m 0s (remain 0m 7s) 


Epoch 5 - avg_train_loss: 0.1783  avg_val_loss: 1.0660  time: 89s
Epoch 5 - Score: 0.6299


EVAL: [9/10] Elapsed 0m 6s (remain 0m 0s) 
Epoch: [6][0/75] Elapsed 0m 1s (remain 1m 30s) 
Epoch: [6][50/75] Elapsed 0m 55s (remain 0m 25s) 
Epoch: [6][74/75] Elapsed 1m 21s (remain 0m 0s) 
EVAL: [0/10] Elapsed 0m 0s (remain 0m 7s) 


Epoch 6 - avg_train_loss: 0.1145  avg_val_loss: 0.9050  time: 88s
Epoch 6 - Score: 0.7166


EVAL: [9/10] Elapsed 0m 6s (remain 0m 0s) 
Epoch: [7][0/75] Elapsed 0m 1s (remain 1m 29s) 
Epoch: [7][50/75] Elapsed 0m 55s (remain 0m 26s) 
Epoch: [7][74/75] Elapsed 1m 21s (remain 0m 0s) 
EVAL: [0/10] Elapsed 0m 0s (remain 0m 7s) 


Epoch 7 - avg_train_loss: 0.0384  avg_val_loss: 0.9906  time: 88s
Epoch 7 - Score: 0.7180
Epoch 7 - Save Best Score: 0.7180 Model


EVAL: [9/10] Elapsed 0m 6s (remain 0m 0s) 
Epoch: [8][0/75] Elapsed 0m 1s (remain 1m 37s) 
Epoch: [8][50/75] Elapsed 0m 55s (remain 0m 26s) 
Epoch: [8][74/75] Elapsed 1m 21s (remain 0m 0s) 
EVAL: [0/10] Elapsed 0m 0s (remain 0m 7s) 


Epoch 8 - avg_train_loss: 0.0202  avg_val_loss: 1.0423  time: 88s
Epoch 8 - Score: 0.7189
Epoch 8 - Save Best Score: 0.7189 Model


EVAL: [9/10] Elapsed 0m 6s (remain 0m 0s) 
Epoch: [9][0/75] Elapsed 0m 1s (remain 1m 31s) 
Epoch: [9][50/75] Elapsed 0m 55s (remain 0m 26s) 
Epoch: [9][74/75] Elapsed 1m 21s (remain 0m 0s) 
EVAL: [0/10] Elapsed 0m 0s (remain 0m 7s) 


Epoch 9 - avg_train_loss: 0.0150  avg_val_loss: 1.0678  time: 89s
Epoch 9 - Score: 0.7050


EVAL: [9/10] Elapsed 0m 6s (remain 0m 0s) 
Epoch: [10][0/75] Elapsed 0m 1s (remain 1m 28s) 
Epoch: [10][50/75] Elapsed 0m 55s (remain 0m 26s) 
Epoch: [10][74/75] Elapsed 1m 21s (remain 0m 0s) 
EVAL: [0/10] Elapsed 0m 0s (remain 0m 7s) 


Epoch 10 - avg_train_loss: 0.0177  avg_val_loss: 1.0664  time: 88s
Epoch 10 - Score: 0.7050


EVAL: [9/10] Elapsed 0m 6s (remain 0m 0s) 


Score: 0.7189
Score: 0.7385


0.7189104808590102


In [20]:
A = pd.read_csv(OUTPUT_MODEL_DIR+'oof_df.csv')
A.head()

Unnamed: 0,id,description,label,inputs,kfold,Data scientist,Machine learning engineer,Software engineer,Consultant
0,1,"Designs and develops high quality, scalable a...",2,"Designs and develops high quality, scalable a...",0,-0.970095,-3.277658,6.953153,-2.623476
1,9,Maintain and improve existing predictive model...,0,Maintain and improve existing predictive model...,0,7.207675,-2.065214,-4.686345,-1.795109
2,10,Optimize deep learning frameworks like Tensor...,1,Optimize deep learning frameworks like Tensor...,0,2.325032,5.87636,-3.665878,-4.575485
3,26,Explore and evaluate new ML algorithms to opti...,0,Explore and evaluate new ML algorithms to opti...,0,2.320586,5.986088,-3.544386,-4.260051
4,32,Optimizing our ML model and methods for deter...,1,Optimizing our ML model and methods for deter...,0,2.538621,5.531209,-2.527702,-4.73091
