In [1]:
import os
import sys
from importlib import reload

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
import tqdm

import torch
from torch.utils import data as D

# Local imports
sys.path.append('../src')
import dataset
import trainer
import models
import utils

# Transformers
import transformers
from transformers import XLMRobertaModel, XLMRobertaTokenizer, XLMRobertaConfig
from transformers import AdamW, get_linear_schedule_with_warmup, get_constant_schedule


os.environ['CUDA_VISIBLE_DEVICES'] = '0'
utils.seed_everything()

## Datasets

In [2]:
reload(dataset)

<module 'dataset' from '../src/dataset.py'>

In [3]:
%%time
valid = dataset.Dataset('../input/validatio_debug_32.npz')
valid.x.shape, valid.y.shape

CPU times: user 4.41 ms, sys: 1.63 ms, total: 6.04 ms
Wall time: 10.1 ms


((32, 512), (32,))

In [4]:
%%time
test = dataset.Dataset('../input/test_debug_32.npz')
test.x.shape, test.y.shape

CPU times: user 3.26 ms, sys: 2.4 ms, total: 5.66 ms
Wall time: 7.63 ms


((32, 512), (32,))

In [5]:
%%time
train = dataset.Dataset('../input/jigsaw-toxic-comment-trai_debug_32.npz')
train.x.shape, train.y.shape

CPU times: user 3.08 ms, sys: 2.12 ms, total: 5.2 ms
Wall time: 7.09 ms


((32, 512), (32,))

## Data loaders

In [6]:
batch_size = 2
num_workers = 4

loader_train = D.DataLoader(train, 
#                             sampler=train.weighted_sampler(), 
                            batch_size=batch_size, num_workers=num_workers)
loader_valid = D.DataLoader(valid, 
                            batch_size=batch_size, num_workers=num_workers)
loader_test = D.DataLoader(test, 
                           batch_size=batch_size, num_workers=num_workers)

In [7]:
len(loader_train), len(loader_valid), len(loader_test)

(16, 16, 16)

In [8]:
x, y, am = next(iter(loader_train))

## Model

In [9]:
reload(models)

<module 'models' from '../src/models.py'>

In [10]:
backbone = XLMRobertaModel(XLMRobertaConfig.from_pretrained('xlm-roberta-large'))

In [11]:
model = models.Model(backbone, mix=False, dropout=0)

## Trainer

In [12]:
reload(trainer)

<module 'trainer' from '../src/trainer.py'>

In [13]:
trnr = trainer.Trainer('base', model, 
                       loader_train, loader_valid, loader_test)

Sanity check for output

In [21]:
out, loss = trnr(x, y, am)

In [22]:
out

tensor([[1.0928, 1.2950],
        [0.8992, 1.2395],
        [1.1048, 1.3800],
        [1.1402, 1.3978]], device='cuda:0', grad_fn=<AddmmBackward>)

In [23]:
loss

tensor(0.6357, device='cuda:0', grad_fn=<MeanBackward0>)

## Training

In [14]:
trnr.fit()

ep. 0000 (lr 1.00e-05): 100%|##########| 16/16 [00:05<00:00,  3.09it/s, loss=0.616, acc=0.781]
valid: 100%|##########| 16/16 [00:01<00:00, 12.60it/s]


Predicted. Avg loss: 1.2786, acc: 0.8125
Saved model to ../checkpoints//base_last.pth


ep. 0001 (lr 1.00e-05): 100%|##########| 16/16 [00:04<00:00,  3.43it/s, loss=0.556, acc=0.906]
valid: 100%|##########| 16/16 [00:01<00:00, 12.51it/s]


Predicted. Avg loss: 0.6815, acc: 0.8125
Saved model to ../checkpoints//base_last.pth


ep. 0002 (lr 1.00e-05): 100%|##########| 16/16 [00:04<00:00,  3.44it/s, loss=0.345, acc=0.906]
valid: 100%|##########| 16/16 [00:01<00:00, 12.59it/s]


Predicted. Avg loss: 0.7587, acc: 0.8125
Saved model to ../checkpoints//base_last.pth


ep. 0003 (lr 1.00e-05): 100%|##########| 16/16 [00:04<00:00,  3.43it/s, loss=0.388, acc=0.906]
valid: 100%|##########| 16/16 [00:01<00:00, 12.52it/s]


Predicted. Avg loss: 0.6942, acc: 0.8125
Saved model to ../checkpoints//base_last.pth


ep. 0004 (lr 1.00e-05): 100%|##########| 16/16 [00:04<00:00,  3.45it/s, loss=0.348, acc=0.906]
valid: 100%|##########| 16/16 [00:01<00:00, 12.60it/s]


Predicted. Avg loss: 0.6914, acc: 0.8125
Saved model to ../checkpoints//base_last.pth


test: 100%|##########| 16/16 [00:01<00:00, 12.44it/s]


(array([[ 2.2840903, -1.2495248],
        [ 2.2881947, -1.2449305],
        [ 2.2844896, -1.2462714],
        [ 2.2839932, -1.2443591],
        [ 2.285874 , -1.2427415],
        [ 2.28372  , -1.2494678],
        [ 2.2848458, -1.2482631],
        [ 2.2830815, -1.2506629],
        [ 2.2853894, -1.2498363],
        [ 2.2852354, -1.242496 ],
        [ 2.2840648, -1.2451131],
        [ 2.2834718, -1.2500204],
        [ 2.284372 , -1.2443455],
        [ 2.2860153, -1.2430453],
        [ 2.2846813, -1.2484543],
        [ 2.287166 , -1.2486053],
        [ 2.281259 , -1.2475924],
        [ 2.2850306, -1.245669 ],
        [ 2.285458 , -1.2476667],
        [ 2.28639  , -1.2436235],
        [ 2.2874494, -1.2456828],
        [ 2.28435  , -1.2477801],
        [ 2.286283 , -1.2438145],
        [ 2.28544  , -1.2503381],
        [ 2.2838805, -1.2522472],
        [ 2.2834644, -1.2434825],
        [ 2.2877247, -1.2451212],
        [ 2.2837992, -1.2435986],
        [ 2.2877417, -1.2442566],
        [ 2.28

## Prediction

In [15]:
pred, loss, acc = trnr.test()

test: 100%|##########| 16/16 [00:01<00:00, 12.31it/s]


In [16]:
acc

1.0