In [3]:
import numpy as np
import pandas as pd

import os
from glob import glob

import torch
import torch.nn as nn
from torch.utils.data import Dataset,DataLoader
from torch.autograd import Variable
from torch.utils.data.sampler import SequentialSampler, RandomSampler
import sklearn

import time
import random
from datetime import datetime
from tqdm import tqdm
tqdm.pandas()
from transformers import AdamW, get_linear_schedule_with_warmup, get_constant_schedule
import transformers
import gc
import re

# !pip install nltk > /dev/null
import nltk
nltk.download('punkt')
from transformers import AutoTokenizer, AutoModel
from nltk import sent_tokenize

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [None]:
SEED = 42
BACKBONE_PATH = "roberta-base"


def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(SEED)

In [None]:
df = pd.read_csv('../input/enterpret/train.csv')
df['text'] = df['text'].str.lower()
df['aspect'] = df['aspect'].str.lower()

### [NLP Albumentations](https://www.kaggle.com/shonenkov/nlp-albumentations)

In [None]:
from nltk import sent_tokenize
from random import shuffle
import random
import albumentations
from albumentations.core.transforms_interface import DualTransform, BasicTransform


LANGS = {
    'en': 'english',
}

def get_sentences(text, lang='en'):
    return sent_tokenize(text, LANGS.get(lang, 'english'))

def exclude_duplicate_sentences(text, lang='en'):
    sentences = []
    for sentence in get_sentences(text, lang):
        sentence = sentence.strip()
        if sentence not in sentences:
            sentences.append(sentence)
    return ' '.join(sentences)

def clean_text(text, lang='en'):
    text = str(text)
    text = re.sub(r' r','',text)
    text = exclude_duplicate_sentences(text, lang)
    return text.strip()

In [None]:
def onehot(size, target):
    vec = torch.zeros(size, dtype=torch.float32)
    vec[target] = 1.
    return vec

class DatasetRetriever(Dataset):

    def __init__(self, labels_or_ids, comment_texts, lang = 'en', test=False):
        self.test = test
        self.lang = lang
        self.labels_or_ids = labels_or_ids
        self.comment_texts = comment_texts
        self.tokenizer = AutoTokenizer.from_pretrained("roberta-base",lowercase=True)
        
    def get_tokens(self, text):
        encoded = self.tokenizer.encode_plus(
            text, 
            add_special_tokens=True, 
            max_length=256, 
            pad_to_max_length=True
        )
        return encoded['input_ids'], encoded['attention_mask']

    def __len__(self):
        return self.comment_texts.shape[0]

    def __getitem__(self, idx):
        text = self.comment_texts[idx]
        if self.test is False:
            label = self.labels_or_ids[idx]
            target = onehot(3, label)

        tokens, attention_mask = self.get_tokens(str(text))
        tokens, attention_mask = torch.tensor(tokens), torch.tensor(attention_mask)

        if self.test is False:
            return target, tokens, attention_mask
        return self.labels_or_ids[idx], tokens, attention_mask

    def get_labels(self):
        return list(np.char.add(self.labels_or_ids.astype(str),''))

In [None]:
from sklearn.utils import shuffle
df = shuffle(df)
df = df.reset_index(drop = True)
df

Unnamed: 0,text,aspect,label
0,"missing grouping, inline urls in long text and...",text,0
1,the mobile app is extremely slow and lags behi...,app,0
2,prompt delivery and availability of products.,availability of products.,2
3,"app response is poor, support process gone bad...",app,0
4,"also, go ahead and make widgets for notificati...",macos.,0
...,...,...,...
3995,"and it takes minutes to load just a page, neve...",app,2
3996,this is the first app i’ve ever written a revi...,user-friendly,2
3997,improve quality of vegetables and availability...,vegetables,0
3998,i want to know if my current plan is paid,plan,1


In [None]:
class AucMeter(object):
    def __init__(self):
        self.reset()

    def reset(self):
        self.y_true = np.array([0,1,2])
        self.y_pred = np.array([0,1,2])
        self.score = 0

    def update(self, y_true, y_pred):
        y_true = y_true.cpu().numpy().argmax(axis=1)
        y_pred = nn.functional.softmax(y_pred, dim=1).data.cpu().numpy().argmax(axis=1)
        self.score = sklearn.metrics.accuracy_score(y_true, y_pred)
    
    @property
    def acc(self):
        return self.score

class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [None]:
class LabelSmoothing(nn.Module):
    def __init__(self, smoothing = 0.01):
        super(LabelSmoothing, self).__init__()
        self.confidence = 1.0 - smoothing
        self.smoothing = smoothing

    def forward(self, x, target):
        x = x.float()
        target = target.float()
        logprobs = torch.nn.functional.log_softmax(x, dim = -1)
        nll_loss = -logprobs * target
        nll_loss = nll_loss.sum(-1)
        smooth_loss = -logprobs.mean(dim=-1)
        loss = self.confidence * nll_loss + self.smoothing * smooth_loss
        return loss.mean()

In [None]:
import warnings
from tqdm import tqdm
warnings.filterwarnings("ignore")

class Fitter:
    
    def __init__(self, model, device, config, fold):
        if not os.path.exists('node_submissions'):
            os.makedirs('node_submissions')

        self.config = config
        self.epoch = 0
        self.log_path = 'log.txt'
        self.fold = fold
        self.model = model
        self.device = device

        param_optimizer = list(self.model.named_parameters())
        no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
        optimizer_grouped_parameters = [
            {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.001},
            {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
        ]

        self.optimizer = AdamW(optimizer_grouped_parameters, lr=config.lr)
        self.scheduler = config.SchedulerClass(self.optimizer, **config.scheduler_params)

        self.criterion = config.criterion
        print(f'Fitter prepared. Device is {self.device}')

    def fit(self, train_loader, validation_loader):
        for e in range(self.config.n_epochs):
            if self.config.verbose:
                lr = self.optimizer.param_groups[0]['lr']
                timestamp = datetime.utcnow().isoformat()
                print(f'\n{timestamp}\nLR: {lr}')

            t = time.time()
            para_loader = train_loader
            losses, final_acc = self.train_one_epoch(para_loader)
            
            print(f'[RESULT]: Train. Epoch: {self.epoch}, loss: {losses.avg:.5f}, final_score: {final_acc:.5f}, time: {(time.time() - t):.5f}')

            t = time.time()
            para_loader = validation_loader
            losses, final_acc = self.validation(para_loader)

            print(f'[RESULT]: Validation. Epoch: {self.epoch}, loss: {losses.avg:.5f}, final_score: {final_acc:.5f}, time: {(time.time() - t):.5f}')

            if self.config.validation_scheduler:
                self.scheduler.step(metrics=final_acc)

            self.epoch += 1
    
    def run_tuning_and_inference(self, test_loader, validation_tune_loader):
        for e in range(2):
            self.optimizer.param_groups[0]['lr'] = self.config.lr / (e + 1)
            para_loader = validation_tune_loader
            losses, final_scores = self.train_one_epoch(para_loader)
            para_loader = test_loader
            self.run_inference(para_loader)

    def validation(self, val_loader):
        self.model.eval()
        losses = AverageMeter()
        final_scores = AucMeter()
        acc = []
        t = time.time()
        for step, (targets, inputs, attention_masks) in tqdm(enumerate(val_loader)):
            with torch.no_grad():
                inputs = inputs.to(self.device, dtype=torch.long) 
                attention_masks = attention_masks.to(self.device, dtype=torch.long) 
                targets = targets.to(self.device, dtype=torch.float) 

                outputs = self.model(inputs, attention_masks)
                loss = self.criterion(outputs, targets)
                
                batch_size = inputs.size(0)

                final_scores.update(targets, outputs)
                losses.update(loss.detach().item(), batch_size)
                acc.append(final_scores.acc)
                
        print('loss: ' + f'{losses.avg:.5f}, final_score: {np.mean(acc):.5f}, ')
        return losses, np.mean(acc)
         
    def train_one_epoch(self, train_loader):
        self.model.train()

        losses = AverageMeter()
        final_scores = AucMeter()
        t = time.time()
        acc = []
        for step, (targets, inputs, attention_masks) in tqdm(enumerate(train_loader)):   

            inputs = inputs.to(self.device, dtype=torch.long)
            attention_masks = attention_masks.to(self.device, dtype=torch.long)
            targets = targets.to(self.device, dtype=torch.float)

            self.optimizer.zero_grad()

            outputs = self.model(inputs, attention_masks)
            loss = self.criterion(outputs, targets)

            batch_size = inputs.size(0)
            
            final_scores.update(targets, outputs)
            
            losses.update(loss.detach().item(), batch_size)

            loss.backward()
            self.optimizer.step()

            if self.config.step_scheduler:
                self.scheduler.step()
            acc.append(final_scores.acc)
            
        print('loss: ' + f'{losses.avg:.5f}, final_score: {np.mean(acc):.5f}, ')
        self.model.eval()
        self.save('last-checkpoint'+str(self.fold)+'.bin')
        return losses, np.mean(acc)

    def run_inference(self, test_loader):
        #self.model.load_state_dict(torch.load(path))
        self.model.eval()
        result = {'id': [], 'toxic': []}
        t = time.time()
        for step, (ids, inputs, attention_masks) in enumerate(test_loader):
            if self.config.verbose:
                if step % self.config.verbose_step == 0:
                    print(f'Prediction Step {step}, time: {(time.time() - t):.5f}')

            with torch.no_grad():
                inputs = inputs.to(self.device, dtype=torch.long) 
                attention_masks = attention_masks.to(self.device, dtype=torch.long)
                outputs = self.model(inputs, attention_masks)
                toxics = np.argmax(nn.functional.softmax(outputs, dim=1).data.cpu().numpy(), axis = -1)
            result['id'].extend(ids.cpu().numpy())
            result['toxic'].extend(toxics)

        result = pd.DataFrame(result)
        result.to_csv(f'node_submissions/submission.csv', index=False)

    def save(self, path):        
        torch.save(self.model.state_dict(), path)

### Model

In [None]:
class SentSimpleNNModel(nn.Module):

    def __init__(self):
        super(SentSimpleNNModel, self).__init__()
        self.backbone = AutoModel.from_pretrained('roberta-base')
        self.dropout = nn.Dropout(0.3)
        self.linear = nn.Linear(in_features=self.backbone.pooler.dense.out_features,out_features=3)
        
    def forward(self, input_ids, attention_masks):
        seq_x, _= self.backbone(input_ids=input_ids, attention_mask=attention_masks, return_dict=False)
        #apool = torch.mean(seq_x, 1)
        mpool, _ = torch.max(seq_x, 1)
        #x = torch.cat((apool, mpool), 1)
        x = self.dropout(mpool)
        return self.linear(x)

# Custom Config

In [None]:
net = SentSimpleNNModel()

Downloading:   0%|          | 0.00/478M [00:00<?, ?B/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [None]:
df['text'].values[0]

'missing grouping, inline urls in long text and many other features as others have pointed out.\n\n'

In [None]:
class TrainGlobalConfig:
    num_workers = 0 
    batch_size = 32
    n_epochs = 5
    lr = 3e-5

    # -------------------
    verbose = True
    verbose_step = 200
    # -------------------

    # --------------------
    step_scheduler = False  # do scheduler.step after optimizer.step
    validation_scheduler = True  # do scheduler.step after validation stage loss
    SchedulerClass = torch.optim.lr_scheduler.ReduceLROnPlateau
    scheduler_params = dict(
        mode='max',
        factor=0.7,
        patience=0,
        verbose=False, 
        threshold=0.0001,
        threshold_mode='abs',
        cooldown=0, 
        min_lr=1e-8,
        eps=1e-08
    )
    # --------------------

    # -------------------
    criterion = LabelSmoothing()
    # -------------------

### Main method

In [None]:
from sklearn import model_selection

df = pd.read_csv("../input/enterpret/train.csv")
df.columns = ['text','selected_text','label']
df['selected_text'] = df['selected_text'].str.lower()
df['text'] = df['text'].str.lower()
df['aspect'] = df['selected_text'].str.lower()
df['text'] = df['text'] + ' Aspect: ' + df['selected_text']

df = df.dropna().reset_index(drop=True)
df["kfold"] = -1

df = df.sample(frac=1,random_state=SEED).reset_index(drop=True)

kf = model_selection.StratifiedKFold(n_splits=10,random_state=SEED)

for fold, (trn_, val_) in enumerate(kf.split(X=df, y=df.label.values)):
    print(len(trn_), len(val_))
    df.loc[val_, 'kfold'] = fold

3600 400
3600 400
3600 400
3600 400
3600 400
3600 400
3600 400
3600 400
3600 400
3600 400


In [None]:
def run(fold):
    device = 'cuda:0'
    net.to(device)
    from torch.utils.data.distributed import DistributedSampler
    
    df_train = df[df.kfold != fold].reset_index(drop=True)
    df_valid = df[df.kfold == fold].reset_index(drop=True)
    
    train_dataset = DatasetRetriever(
        labels_or_ids=df_train['label'].values, 
        comment_texts=df_train['text'].values, 
        lang='en')
    
    valid_dataset = DatasetRetriever(
        labels_or_ids=df_valid['label'].values, 
        comment_texts=df_valid['text'].values, 
        lang='en')
    
    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=TrainGlobalConfig.batch_size,
        pin_memory=False,
        drop_last=True,
        num_workers=TrainGlobalConfig.num_workers,
    )
    
    validation_loader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=TrainGlobalConfig.batch_size,
        pin_memory=False,
        drop_last=False,
        num_workers=TrainGlobalConfig.num_workers
    )
    
    test_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=TrainGlobalConfig.batch_size,
        pin_memory=False,
        drop_last=False,
        num_workers=TrainGlobalConfig.num_workers
    )
    
    fitter = Fitter(model=net, device=device, config=TrainGlobalConfig, fold = fold)
    fitter.fit(train_loader, validation_loader)
    fitter.run_inference(validation_loader)

In [None]:
run(fold = 0)

Fitter prepared. Device is cuda:0

2021-12-25T22:40:14.787031
LR: 3e-05


0it [00:00, ?it/s]Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
112it [01:29,  1.24it/s]


loss: 0.88743, final_score: 0.58454, 
[RESULT]: Train. Epoch: 0, loss: 0.88743, final_score: 0.58454, time: 90.99636


13it [00:03,  3.87it/s]


loss: 0.61881, final_score: 0.75000, 
[RESULT]: Validation. Epoch: 0, loss: 0.61881, final_score: 0.75000, time: 3.36537

2021-12-25T22:41:49.150246
LR: 3e-05


112it [01:28,  1.26it/s]


loss: 0.62159, final_score: 0.74833, 
[RESULT]: Train. Epoch: 1, loss: 0.62159, final_score: 0.74833, time: 90.43319


13it [00:03,  3.82it/s]


loss: 0.52757, final_score: 0.77885, 
[RESULT]: Validation. Epoch: 1, loss: 0.52757, final_score: 0.77885, time: 3.40802

2021-12-25T22:43:22.991793
LR: 3e-05


112it [01:29,  1.26it/s]


loss: 0.48442, final_score: 0.81027, 
[RESULT]: Train. Epoch: 2, loss: 0.48442, final_score: 0.81027, time: 90.50808


13it [00:03,  3.88it/s]


loss: 0.54434, final_score: 0.77404, 
[RESULT]: Validation. Epoch: 2, loss: 0.54434, final_score: 0.77404, time: 3.36111

2021-12-25T22:44:56.861485
LR: 2.1e-05


112it [01:29,  1.26it/s]


loss: 0.38314, final_score: 0.85798, 
[RESULT]: Train. Epoch: 3, loss: 0.38314, final_score: 0.85798, time: 90.56351


13it [00:03,  3.85it/s]


loss: 0.65110, final_score: 0.73077, 
[RESULT]: Validation. Epoch: 3, loss: 0.65110, final_score: 0.73077, time: 3.38936

2021-12-25T22:46:30.815689
LR: 1.4699999999999998e-05


112it [01:28,  1.26it/s]


loss: 0.28287, final_score: 0.90011, 
[RESULT]: Train. Epoch: 4, loss: 0.28287, final_score: 0.90011, time: 90.41977


13it [00:03,  3.88it/s]


loss: 0.58447, final_score: 0.77644, 
[RESULT]: Validation. Epoch: 4, loss: 0.58447, final_score: 0.77644, time: 3.36006
Prediction Step 0, time: 0.01431


In [None]:
#df1 = pd.read_csv('./node_submissions/submission.csv')
#w = df[df.kfold == 0]['label']
#sklearn.metrics.accuracy_score(w, df1['toxic'])

In [None]:
run(fold = 1)
#df1 = pd.read_csv('./node_submissions/submission.csv')
#w = df[df.kfold == 1]['label']
#sklearn.metrics.accuracy_score(w, df1['toxic'])

Fitter prepared. Device is cuda:0

2021-12-25T22:48:09.012979
LR: 3e-05


112it [01:28,  1.26it/s]


loss: 0.27978, final_score: 0.90430, 
[RESULT]: Train. Epoch: 0, loss: 0.27978, final_score: 0.90430, time: 89.98971


13it [00:03,  3.85it/s]


loss: 0.18359, final_score: 0.93990, 
[RESULT]: Validation. Epoch: 0, loss: 0.18359, final_score: 0.93990, time: 3.37872

2021-12-25T22:49:42.382555
LR: 3e-05


112it [01:29,  1.26it/s]


loss: 0.22330, final_score: 0.92104, 
[RESULT]: Train. Epoch: 1, loss: 0.22330, final_score: 0.92104, time: 90.40707


13it [00:03,  3.64it/s]


loss: 0.23724, final_score: 0.92548, 
[RESULT]: Validation. Epoch: 1, loss: 0.23724, final_score: 0.92548, time: 3.58974

2021-12-25T22:51:16.379883
LR: 2.1e-05


112it [01:29,  1.26it/s]


loss: 0.16977, final_score: 0.94169, 
[RESULT]: Train. Epoch: 2, loss: 0.16977, final_score: 0.94169, time: 90.57351


13it [00:03,  3.82it/s]


loss: 0.42893, final_score: 0.87740, 
[RESULT]: Validation. Epoch: 2, loss: 0.42893, final_score: 0.87740, time: 3.41670

2021-12-25T22:52:50.371923
LR: 1.4699999999999998e-05


112it [01:29,  1.26it/s]


loss: 0.09888, final_score: 0.96680, 
[RESULT]: Train. Epoch: 3, loss: 0.09888, final_score: 0.96680, time: 90.71413


13it [00:03,  3.76it/s]


loss: 0.29892, final_score: 0.89663, 
[RESULT]: Validation. Epoch: 3, loss: 0.29892, final_score: 0.89663, time: 3.47106

2021-12-25T22:54:24.557687
LR: 1.0289999999999998e-05


112it [01:29,  1.26it/s]


loss: 0.05928, final_score: 0.98214, 
[RESULT]: Train. Epoch: 4, loss: 0.05928, final_score: 0.98214, time: 90.44863


13it [00:03,  3.83it/s]


loss: 0.29007, final_score: 0.91106, 
[RESULT]: Validation. Epoch: 4, loss: 0.29007, final_score: 0.91106, time: 3.40914
Prediction Step 0, time: 0.01440


In [None]:
run(fold = 2)
#df1 = pd.read_csv('./node_submissions/submission.csv')
#w = df[df.kfold == 2]['label']
#sklearn.metrics.accuracy_score(w, df1['toxic'])

Fitter prepared. Device is cuda:0

2021-12-25T22:56:02.763365
LR: 3e-05


112it [01:29,  1.26it/s]


loss: 0.12506, final_score: 0.96177, 
[RESULT]: Train. Epoch: 0, loss: 0.12506, final_score: 0.96177, time: 90.17961


13it [00:03,  3.85it/s]


loss: 0.05640, final_score: 0.98317, 
[RESULT]: Validation. Epoch: 0, loss: 0.05640, final_score: 0.98317, time: 3.38603

2021-12-25T22:57:36.329978
LR: 3e-05


112it [01:29,  1.26it/s]


loss: 0.09685, final_score: 0.96931, 
[RESULT]: Train. Epoch: 1, loss: 0.09685, final_score: 0.96931, time: 90.60908


13it [00:03,  3.77it/s]


loss: 0.08801, final_score: 0.96635, 
[RESULT]: Validation. Epoch: 1, loss: 0.08801, final_score: 0.96635, time: 3.46070

2021-12-25T22:59:10.400706
LR: 2.1e-05


112it [01:29,  1.26it/s]


loss: 0.08553, final_score: 0.97015, 
[RESULT]: Train. Epoch: 2, loss: 0.08553, final_score: 0.97015, time: 90.66221


13it [00:03,  3.81it/s]


loss: 0.11932, final_score: 0.94952, 
[RESULT]: Validation. Epoch: 2, loss: 0.11932, final_score: 0.94952, time: 3.43396

2021-12-25T23:00:44.497828
LR: 1.4699999999999998e-05


112it [01:29,  1.26it/s]


loss: 0.07379, final_score: 0.97517, 
[RESULT]: Train. Epoch: 3, loss: 0.07379, final_score: 0.97517, time: 90.83158


13it [00:03,  3.80it/s]


loss: 0.05814, final_score: 0.98077, 
[RESULT]: Validation. Epoch: 3, loss: 0.05814, final_score: 0.98077, time: 3.43893

2021-12-25T23:02:18.768856
LR: 1.0289999999999998e-05


112it [01:29,  1.26it/s]


loss: 0.03078, final_score: 0.98968, 
[RESULT]: Train. Epoch: 4, loss: 0.03078, final_score: 0.98968, time: 90.52130


13it [00:03,  3.78it/s]


loss: 0.07442, final_score: 0.97356, 
[RESULT]: Validation. Epoch: 4, loss: 0.07442, final_score: 0.97356, time: 3.45444
Prediction Step 0, time: 0.01305


In [None]:
run(fold = 3)
#df1 = pd.read_csv('./node_submissions/submission.csv')
#w = df[df.kfold == 3]['label']
#sklearn.metrics.accuracy_score(w, df1['toxic'])

Fitter prepared. Device is cuda:0

2021-12-25T23:03:57.575505
LR: 3e-05


112it [01:29,  1.26it/s]


loss: 0.07566, final_score: 0.97517, 
[RESULT]: Train. Epoch: 0, loss: 0.07566, final_score: 0.97517, time: 90.14743


13it [00:03,  3.83it/s]


loss: 0.04562, final_score: 0.98558, 
[RESULT]: Validation. Epoch: 0, loss: 0.04562, final_score: 0.98558, time: 3.39937

2021-12-25T23:05:31.122792
LR: 3e-05


112it [01:29,  1.25it/s]


loss: 0.05508, final_score: 0.98186, 
[RESULT]: Train. Epoch: 1, loss: 0.05508, final_score: 0.98186, time: 90.87156


13it [00:03,  3.84it/s]


loss: 0.07875, final_score: 0.97837, 
[RESULT]: Validation. Epoch: 1, loss: 0.07875, final_score: 0.97837, time: 3.38978

2021-12-25T23:07:05.384656
LR: 2.1e-05


112it [01:29,  1.25it/s]


loss: 0.05322, final_score: 0.98242, 
[RESULT]: Train. Epoch: 2, loss: 0.05322, final_score: 0.98242, time: 90.92719


13it [00:03,  3.84it/s]


loss: 0.05003, final_score: 0.98317, 
[RESULT]: Validation. Epoch: 2, loss: 0.05003, final_score: 0.98317, time: 3.40615

2021-12-25T23:08:39.719010
LR: 1.4699999999999998e-05


112it [01:29,  1.26it/s]


loss: 0.02826, final_score: 0.99079, 
[RESULT]: Train. Epoch: 3, loss: 0.02826, final_score: 0.99079, time: 90.61775


13it [00:03,  3.85it/s]


loss: 0.04480, final_score: 0.99038, 
[RESULT]: Validation. Epoch: 3, loss: 0.04480, final_score: 0.99038, time: 3.39879

2021-12-25T23:10:13.736666
LR: 1.4699999999999998e-05


112it [01:29,  1.25it/s]


loss: 0.02383, final_score: 0.99107, 
[RESULT]: Train. Epoch: 4, loss: 0.02383, final_score: 0.99107, time: 90.83265


13it [00:03,  3.84it/s]


loss: 0.04444, final_score: 0.99038, 
[RESULT]: Validation. Epoch: 4, loss: 0.04444, final_score: 0.99038, time: 3.41058
Prediction Step 0, time: 0.01603


In [None]:
run(fold = 4)
#df1 = pd.read_csv('./node_submissions/submission.csv')
#w = df[df.kfold == 4]['label']
#sklearn.metrics.accuracy_score(w, df1['toxic'])

Fitter prepared. Device is cuda:0

2021-12-25T23:11:53.208780
LR: 3e-05


112it [01:29,  1.26it/s]


loss: 0.06242, final_score: 0.97879, 
[RESULT]: Train. Epoch: 0, loss: 0.06242, final_score: 0.97879, time: 90.21727


13it [00:03,  3.85it/s]


loss: 0.03778, final_score: 0.99279, 
[RESULT]: Validation. Epoch: 0, loss: 0.03778, final_score: 0.99279, time: 3.37997

2021-12-25T23:13:26.806863
LR: 3e-05


112it [01:29,  1.25it/s]


loss: 0.05615, final_score: 0.98019, 
[RESULT]: Train. Epoch: 1, loss: 0.05615, final_score: 0.98019, time: 90.81979


13it [00:03,  3.85it/s]


loss: 0.05022, final_score: 0.98558, 
[RESULT]: Validation. Epoch: 1, loss: 0.05022, final_score: 0.98558, time: 3.38655

2021-12-25T23:15:01.013754
LR: 2.1e-05


112it [01:29,  1.25it/s]


loss: 0.02968, final_score: 0.98996, 
[RESULT]: Train. Epoch: 2, loss: 0.02968, final_score: 0.98996, time: 91.03920


13it [00:03,  3.83it/s]


loss: 0.04455, final_score: 0.98558, 
[RESULT]: Validation. Epoch: 2, loss: 0.04455, final_score: 0.98558, time: 3.40933

2021-12-25T23:16:35.462841
LR: 1.4699999999999998e-05


112it [01:29,  1.25it/s]


loss: 0.02090, final_score: 0.99191, 
[RESULT]: Train. Epoch: 3, loss: 0.02090, final_score: 0.99191, time: 91.09779


13it [00:03,  3.84it/s]


loss: 0.03563, final_score: 0.99279, 
[RESULT]: Validation. Epoch: 3, loss: 0.03563, final_score: 0.99279, time: 3.41754

2021-12-25T23:18:09.978714
LR: 1.0289999999999998e-05


112it [01:29,  1.25it/s]


loss: 0.01093, final_score: 0.99749, 
[RESULT]: Train. Epoch: 4, loss: 0.01093, final_score: 0.99749, time: 90.96510


13it [00:03,  3.84it/s]


loss: 0.04487, final_score: 0.98798, 
[RESULT]: Validation. Epoch: 4, loss: 0.04487, final_score: 0.98798, time: 3.40563
Prediction Step 0, time: 0.01305


In [None]:
run(fold = 5)
#df1 = pd.read_csv('./node_submissions/submission.csv')
#w = df[df.kfold == 5]['label']
#sklearn.metrics.accuracy_score(w, df1['toxic'])

Fitter prepared. Device is cuda:0

2021-12-25T23:19:50.371875
LR: 3e-05


112it [01:29,  1.25it/s]


loss: 0.04128, final_score: 0.98521, 
[RESULT]: Train. Epoch: 0, loss: 0.04128, final_score: 0.98521, time: 90.44322


13it [00:03,  3.85it/s]


loss: 0.03005, final_score: 0.99038, 
[RESULT]: Validation. Epoch: 0, loss: 0.03005, final_score: 0.99038, time: 3.38036

2021-12-25T23:21:24.196025
LR: 3e-05


112it [01:29,  1.25it/s]


loss: 0.03787, final_score: 0.98661, 
[RESULT]: Train. Epoch: 1, loss: 0.03787, final_score: 0.98661, time: 91.38919


13it [00:03,  3.82it/s]


loss: 0.01180, final_score: 0.99279, 
[RESULT]: Validation. Epoch: 1, loss: 0.01180, final_score: 0.99279, time: 3.41510

2021-12-25T23:22:59.001373
LR: 3e-05


112it [01:29,  1.25it/s]


loss: 0.05640, final_score: 0.98214, 
[RESULT]: Train. Epoch: 2, loss: 0.05640, final_score: 0.98214, time: 91.10538


13it [00:03,  3.85it/s]


loss: 0.00648, final_score: 0.99519, 
[RESULT]: Validation. Epoch: 2, loss: 0.00648, final_score: 0.99519, time: 3.40272

2021-12-25T23:24:33.510567
LR: 3e-05


112it [01:29,  1.25it/s]


loss: 0.03732, final_score: 0.98772, 
[RESULT]: Train. Epoch: 3, loss: 0.03732, final_score: 0.98772, time: 91.56031


13it [00:03,  3.85it/s]


loss: 0.03115, final_score: 0.98317, 
[RESULT]: Validation. Epoch: 3, loss: 0.03115, final_score: 0.98317, time: 3.38154

2021-12-25T23:26:08.453877
LR: 2.1e-05


112it [01:29,  1.25it/s]


loss: 0.04115, final_score: 0.98744, 
[RESULT]: Train. Epoch: 4, loss: 0.04115, final_score: 0.98744, time: 91.15214


13it [00:03,  3.84it/s]


loss: 0.01927, final_score: 0.99038, 
[RESULT]: Validation. Epoch: 4, loss: 0.01927, final_score: 0.99038, time: 3.39090
Prediction Step 0, time: 0.02103


In [None]:
run(fold = 6)
#df1 = pd.read_csv('./node_submissions/submission.csv')
#w = df[df.kfold == 6]['label']
#sklearn.metrics.accuracy_score(w, df1['toxic'])

Fitter prepared. Device is cuda:0

2021-12-25T23:27:49.209119
LR: 3e-05


112it [01:29,  1.25it/s]


loss: 0.04056, final_score: 0.98633, 
[RESULT]: Train. Epoch: 0, loss: 0.04056, final_score: 0.98633, time: 90.41740


13it [00:03,  3.82it/s]


loss: 0.00756, final_score: 0.99760, 
[RESULT]: Validation. Epoch: 0, loss: 0.00756, final_score: 0.99760, time: 3.41023

2021-12-25T23:29:23.037368
LR: 3e-05


112it [01:29,  1.25it/s]


loss: 0.04117, final_score: 0.98633, 
[RESULT]: Train. Epoch: 1, loss: 0.04117, final_score: 0.98633, time: 91.04350


13it [00:03,  3.85it/s]


loss: 0.01352, final_score: 0.99519, 
[RESULT]: Validation. Epoch: 1, loss: 0.01352, final_score: 0.99519, time: 3.38063

2021-12-25T23:30:57.462719
LR: 2.1e-05


112it [01:29,  1.25it/s]


loss: 0.02249, final_score: 0.99163, 
[RESULT]: Train. Epoch: 2, loss: 0.02249, final_score: 0.99163, time: 91.14465


13it [00:03,  3.81it/s]


loss: 0.01908, final_score: 0.99038, 
[RESULT]: Validation. Epoch: 2, loss: 0.01908, final_score: 0.99038, time: 3.42630

2021-12-25T23:32:32.034104
LR: 1.4699999999999998e-05


112it [01:29,  1.25it/s]


loss: 0.01126, final_score: 0.99581, 
[RESULT]: Train. Epoch: 3, loss: 0.01126, final_score: 0.99581, time: 90.93230


13it [00:03,  3.81it/s]


loss: 0.01538, final_score: 0.99519, 
[RESULT]: Validation. Epoch: 3, loss: 0.01538, final_score: 0.99519, time: 3.42387

2021-12-25T23:34:06.392113
LR: 1.0289999999999998e-05


112it [01:29,  1.25it/s]


loss: 0.01104, final_score: 0.99637, 
[RESULT]: Train. Epoch: 4, loss: 0.01104, final_score: 0.99637, time: 91.15274


13it [00:03,  3.85it/s]


loss: 0.01976, final_score: 0.99038, 
[RESULT]: Validation. Epoch: 4, loss: 0.01976, final_score: 0.99038, time: 3.39031
Prediction Step 0, time: 0.01403


In [None]:
run(fold = 7)
#df1 = pd.read_csv('./node_submissions/submission.csv')
#w = df[df.kfold == 7]['label']
#sklearn.metrics.accuracy_score(w, df1['toxic'])

Fitter prepared. Device is cuda:0

2021-12-25T23:35:47.914420
LR: 3e-05


112it [01:29,  1.25it/s]


loss: 0.04014, final_score: 0.98800, 
[RESULT]: Train. Epoch: 0, loss: 0.04014, final_score: 0.98800, time: 90.49377


13it [00:03,  3.83it/s]


loss: 0.02534, final_score: 0.99038, 
[RESULT]: Validation. Epoch: 0, loss: 0.02534, final_score: 0.99038, time: 3.39847

2021-12-25T23:37:21.807147
LR: 3e-05


112it [01:29,  1.25it/s]


loss: 0.03616, final_score: 0.98800, 
[RESULT]: Train. Epoch: 1, loss: 0.03616, final_score: 0.98800, time: 91.21638


13it [00:03,  3.82it/s]


loss: 0.01711, final_score: 0.99279, 
[RESULT]: Validation. Epoch: 1, loss: 0.01711, final_score: 0.99279, time: 3.42275

2021-12-25T23:38:56.447279
LR: 3e-05


112it [01:29,  1.25it/s]


loss: 0.03026, final_score: 0.99107, 
[RESULT]: Train. Epoch: 2, loss: 0.03026, final_score: 0.99107, time: 91.04934


13it [00:03,  3.84it/s]


loss: 0.02370, final_score: 0.99038, 
[RESULT]: Validation. Epoch: 2, loss: 0.02370, final_score: 0.99038, time: 3.39701

2021-12-25T23:40:30.894195
LR: 2.1e-05


112it [01:29,  1.25it/s]


loss: 0.01763, final_score: 0.99470, 
[RESULT]: Train. Epoch: 3, loss: 0.01763, final_score: 0.99470, time: 91.23049


13it [00:03,  3.85it/s]


loss: 0.02475, final_score: 0.99038, 
[RESULT]: Validation. Epoch: 3, loss: 0.02475, final_score: 0.99038, time: 3.40458

2021-12-25T23:42:05.529871
LR: 1.4699999999999998e-05


112it [01:29,  1.25it/s]


loss: 0.01711, final_score: 0.99386, 
[RESULT]: Train. Epoch: 4, loss: 0.01711, final_score: 0.99386, time: 91.41252


13it [00:03,  3.83it/s]


loss: 0.02085, final_score: 0.99279, 
[RESULT]: Validation. Epoch: 4, loss: 0.02085, final_score: 0.99279, time: 3.40593
Prediction Step 0, time: 0.01924


In [None]:
run(fold = 8)
#df1 = pd.read_csv('./node_submissions/submission.csv')
#w = df[df.kfold == 8]['label']
#sklearn.metrics.accuracy_score(w, df1['toxic'])

Fitter prepared. Device is cuda:0

2021-12-25T23:43:47.359836
LR: 3e-05


112it [01:29,  1.25it/s]


loss: 0.03177, final_score: 0.98884, 
[RESULT]: Train. Epoch: 0, loss: 0.03177, final_score: 0.98884, time: 90.38259


13it [00:03,  3.86it/s]


loss: 0.01222, final_score: 0.99760, 
[RESULT]: Validation. Epoch: 0, loss: 0.01222, final_score: 0.99760, time: 3.37586

2021-12-25T23:45:21.118561
LR: 3e-05


112it [01:29,  1.25it/s]


loss: 0.03322, final_score: 0.98800, 
[RESULT]: Train. Epoch: 1, loss: 0.03322, final_score: 0.98800, time: 90.88924


13it [00:03,  3.86it/s]


loss: 0.01028, final_score: 0.99760, 
[RESULT]: Validation. Epoch: 1, loss: 0.01028, final_score: 0.99760, time: 3.38395

2021-12-25T23:46:55.392731
LR: 2.1e-05


112it [01:29,  1.25it/s]


loss: 0.02212, final_score: 0.99191, 
[RESULT]: Train. Epoch: 2, loss: 0.02212, final_score: 0.99191, time: 91.14101


13it [00:03,  3.85it/s]


loss: 0.01183, final_score: 0.99760, 
[RESULT]: Validation. Epoch: 2, loss: 0.01183, final_score: 0.99760, time: 3.40154

2021-12-25T23:48:29.936036
LR: 1.4699999999999998e-05


112it [01:29,  1.24it/s]


loss: 0.01238, final_score: 0.99386, 
[RESULT]: Train. Epoch: 3, loss: 0.01238, final_score: 0.99386, time: 91.46546


13it [00:03,  3.85it/s]


loss: 0.01058, final_score: 0.99760, 
[RESULT]: Validation. Epoch: 3, loss: 0.01058, final_score: 0.99760, time: 3.39420

2021-12-25T23:50:04.797005
LR: 1.0289999999999998e-05


112it [01:29,  1.25it/s]


loss: 0.00800, final_score: 0.99526, 
[RESULT]: Train. Epoch: 4, loss: 0.00800, final_score: 0.99526, time: 90.96048


13it [00:03,  3.79it/s]


loss: 0.00940, final_score: 0.99760, 
[RESULT]: Validation. Epoch: 4, loss: 0.00940, final_score: 0.99760, time: 3.44121
Prediction Step 0, time: 0.01922


In [None]:
run(fold = 9)
#df1 = pd.read_csv('./node_submissions/submission.csv')
#w = df[df.kfold == 9]['label']
#sklearn.metrics.accuracy_score(w, df1['toxic'])

Fitter prepared. Device is cuda:0

2021-12-25T23:51:46.936224
LR: 3e-05


112it [01:29,  1.25it/s]


loss: 0.03258, final_score: 0.98800, 
[RESULT]: Train. Epoch: 0, loss: 0.03258, final_score: 0.98800, time: 90.55224


13it [00:03,  3.86it/s]


loss: 0.10930, final_score: 0.97596, 
[RESULT]: Validation. Epoch: 0, loss: 0.10930, final_score: 0.97596, time: 3.37960

2021-12-25T23:53:20.868583
LR: 3e-05


112it [01:29,  1.25it/s]


loss: 0.02460, final_score: 0.99191, 
[RESULT]: Train. Epoch: 1, loss: 0.02460, final_score: 0.99191, time: 91.61517


13it [00:03,  3.79it/s]


loss: 0.14266, final_score: 0.96635, 
[RESULT]: Validation. Epoch: 1, loss: 0.14266, final_score: 0.96635, time: 3.47046

2021-12-25T23:54:55.955046
LR: 2.1e-05


112it [01:29,  1.25it/s]


loss: 0.01862, final_score: 0.99275, 
[RESULT]: Train. Epoch: 2, loss: 0.01862, final_score: 0.99275, time: 91.11022


13it [00:03,  3.82it/s]


loss: 0.10985, final_score: 0.97596, 
[RESULT]: Validation. Epoch: 2, loss: 0.10985, final_score: 0.97596, time: 3.42110

2021-12-25T23:56:30.487688
LR: 1.4699999999999998e-05


112it [01:29,  1.25it/s]


loss: 0.01095, final_score: 0.99358, 
[RESULT]: Train. Epoch: 3, loss: 0.01095, final_score: 0.99358, time: 91.75323


13it [00:03,  3.78it/s]


loss: 0.11806, final_score: 0.97356, 
[RESULT]: Validation. Epoch: 3, loss: 0.11806, final_score: 0.97356, time: 3.44908

2021-12-25T23:58:05.709905
LR: 1.0289999999999998e-05


112it [01:29,  1.25it/s]


loss: 0.01082, final_score: 0.99554, 
[RESULT]: Train. Epoch: 4, loss: 0.01082, final_score: 0.99554, time: 91.09893


13it [00:03,  3.83it/s]


loss: 0.11597, final_score: 0.97837, 
[RESULT]: Validation. Epoch: 4, loss: 0.11597, final_score: 0.97837, time: 3.40878
Prediction Step 0, time: 0.01313
