In [1]:
import os
import sys
from importlib import reload
sys.path.append('../src')

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
import tqdm

In [16]:
import torch
from torch.utils import data as D

In [4]:
# Local imports
sys.path.append('../src')

import dataset
import trainer
import models

In [23]:
import transformers
from transformers import XLMRobertaModel, XLMRobertaTokenizer, XLMRobertaConfig
from transformers import AdamW, get_linear_schedule_with_warmup, get_constant_schedule

In [22]:
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

## Datasets

In [40]:
reload(dataset)

<module 'dataset' from '../src/dataset.py'>

In [41]:
%%time
valid = dataset.Dataset('../input/validation.npz')
valid.x.shape, valid.y.shape

CPU times: user 135 ms, sys: 47.7 ms, total: 183 ms
Wall time: 228 ms


((8000, 512), (8000,))

In [42]:
%%time
test = dataset.Dataset('../input/test.npz')
test.x.shape, test.y.shape

CPU times: user 678 ms, sys: 216 ms, total: 894 ms
Wall time: 975 ms


((63812, 512), (63812,))

In [43]:
%%time
train = dataset.Dataset('../input/jigsaw-toxic-comment-train.npz')
train.x.shape, train.y.shape

CPU times: user 1.73 s, sys: 700 ms, total: 2.43 s
Wall time: 2.48 s


((223549, 512), (223549,))

## Data loaders

In [44]:
batch_size = 2
num_workers = 16

loader_train = D.DataLoader(train, sampler=train.weighted_sampler(), 
                            batch_size=batch_size, num_workers=num_workers)
loader_valid = D.DataLoader(valid, 
                            batch_size=batch_size, num_workers=num_workers)
loader_test = D.DataLoader(test, 
                           batch_size=batch_size, num_workers=num_workers)

In [45]:
len(loader_train), len(loader_valid), len(loader_test)

(21384, 4000, 31906)

## Model

In [54]:
reload(models)

<module 'models' from '../src/models.py'>

In [24]:
backbone = XLMRobertaModel(XLMRobertaConfig.from_pretrained('xlm-roberta-large'))

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=513.0, style=ProgressStyle(description_…




In [55]:
model = models.Model(backbone, mix=False)

Sanity check for output

In [56]:
x, y, am = next(iter(loader_train))

In [57]:
out, idx, lam = model(x, am)

In [59]:
out

tensor([[-0.8094, -0.0375],
        [-0.0612,  0.7674]], grad_fn=<AddmmBackward>)

In [61]:
idx, lam

(None, None)

## Trainer

In [35]:
reload(trainer)

<module 'trainer' from '../src/trainer.py'>

In [31]:
optimizer = AdamW(model.parameters(), 1e-5)

In [36]:
trnr = trainer.Trainer('base', model, loader_train, loader_valid, loader_test, optimizer=optimizer)