In [1]:
import os
import sys
from importlib import reload
sys.path.append('../src')

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
import tqdm

In [2]:
import torch
from torch.utils import data as D

In [3]:
# Local imports
sys.path.append('../src')

import dataset
import trainer
import models
import utils

In [4]:
import transformers
from transformers import XLMRobertaModel, XLMRobertaTokenizer, XLMRobertaConfig
from transformers import AdamW, get_linear_schedule_with_warmup, get_constant_schedule

In [5]:
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

In [6]:
utils.seed_everything()

## Datasets

In [7]:
reload(dataset)

<module 'dataset' from '../src/dataset.py'>

In [8]:
%%time
valid = dataset.Dataset('../input/validation.npz')
valid.x.shape, valid.y.shape

CPU times: user 118 ms, sys: 49.8 ms, total: 168 ms
Wall time: 323 ms


((8000, 512), (8000,))

In [9]:
%%time
test = dataset.Dataset('../input/test.npz')
test.x.shape, test.y.shape

CPU times: user 682 ms, sys: 226 ms, total: 908 ms
Wall time: 2.47 s


((63812, 512), (63812,))

In [10]:
%%time
train = dataset.Dataset('../input/jigsaw-toxic-comment-train.npz')
train.x.shape, train.y.shape

CPU times: user 1.81 s, sys: 690 ms, total: 2.5 s
Wall time: 3.89 s


((223549, 512), (223549,))

## Data loaders

In [11]:
batch_size = 4
num_workers = 1

loader_train = D.DataLoader(train, sampler=train.weighted_sampler(), 
                            batch_size=batch_size, num_workers=num_workers)
loader_valid = D.DataLoader(valid, 
                            batch_size=batch_size, num_workers=num_workers)
loader_test = D.DataLoader(test, 
                           batch_size=batch_size, num_workers=num_workers)

In [12]:
len(loader_train), len(loader_valid), len(loader_test)

(10692, 2000, 15953)

In [13]:
x, y, am = next(iter(loader_train))

## Model

In [14]:
reload(models)

<module 'models' from '../src/models.py'>

In [15]:
backbone = XLMRobertaModel(XLMRobertaConfig.from_pretrained('xlm-roberta-large'))

In [16]:
model = models.Model(backbone, mix=True, dropout=0)

In [17]:
model = model.cuda()

## Trainer

In [18]:
reload(trainer)

<module 'trainer' from '../src/trainer.py'>

In [19]:
optimizer = AdamW(model.parameters(), 1e-5)

In [20]:
trnr = trainer.Trainer('base', model, 
                       loader_train, loader_valid, loader_test, 
                       optimizer=optimizer)

Sanity check for output

In [21]:
out, loss = trnr(x, y, am)

In [22]:
out

tensor([[1.0928, 1.2950],
        [0.8992, 1.2395],
        [1.1048, 1.3800],
        [1.1402, 1.3978]], device='cuda:0', grad_fn=<AddmmBackward>)

In [23]:
loss

tensor(0.6357, device='cuda:0', grad_fn=<MeanBackward0>)