In [1]:
import random

from torch.utils import data
import numpy as np
import torch
from tqdm.notebook import tqdm

from pan20 import auth
from pan20.auth.trans import distbert
from pan20.auth import pytorch
from pan20.util.pytorch import anneal, config, metrics, opt, stopping, training

In [2]:
X, truth = auth.load_small()

In [3]:
random.seed(42)

train_ixs = list(range(len(X)))
dev_ixs = random.sample(train_ixs, 5000)
for ix in dev_ixs:
    train_ixs.remove(ix)
test_ixs = random.sample(train_ixs, 5000)
for ix in test_ixs:
    train_ixs.remove(ix)

random.shuffle(train_ixs)

In [4]:
X_test = [X[ix] for ix in test_ixs]
truth_test = [truth[ix] for ix in test_ixs]
X_dev = [X[ix] for ix in dev_ixs]
truth_dev = [truth[ix] for ix in dev_ixs]
X_train = [X[ix] for ix in train_ixs]
truth_train = [truth[ix] for ix in train_ixs]

In [5]:
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [6]:
train = pytorch.Dataset(X_train, truth_train)
dev = pytorch.Dataset(X_dev, truth_dev)
test = pytorch.Dataset(X_test, truth_test)

In [7]:
collate = distbert.CollateFirstK()

In [8]:
cfg = config.ExperimentConfig(
    experiment_name='distilbert1',
    ckpt_dir='ckpts/distilbert1',
    results_dir='results/distilbert1',
    train=training.TrainConfig(
        n_epochs=20,
        seed=42,
        train_batch_size=32,
        run_no=0,
        tune_batch_size=64,
        p_drop=0.1,
        dev_metric='acc',
        memory_limit=32
    ),
    anneal=anneal.ReduceLROnPlateauConfig(
        factor=0.5,
        patience=3,
    ),
    optim=opt.AdamWConfig(
        lr=0.01,
    ),
    stop=stopping.NoDevImprovementConfig(
        patience=3,
        k=3,
        metric='acc'
    )
)

In [9]:
train_loader = data.DataLoader(
    batch_size=cfg.train.train_batch_size, 
    collate_fn=collate, 
    dataset=train, 
    shuffle=True)
dev_loader = data.DataLoader(
    batch_size=cfg.train.tune_batch_size,
    collate_fn=collate,
    dataset=dev,
    shuffle=False)

In [10]:
net = distbert.DistilBERTComparison1(cfg.train.p_drop)

In [11]:
model = training.TrainableModel(net, cfg)

In [12]:
model.train(train_loader, dev_loader)

HBox(children=(FloatProgress(value=0.0, description='epoch', max=20.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='iter', max=1332.0, style=ProgressStyle(description_width=…

HBox(children=(FloatProgress(value=0.0, description='tune', max=79.0, style=ProgressStyle(description_width='i…

RuntimeError: CUDA out of memory. Tried to allocate 30.00 MiB (GPU 0; 5.94 GiB total capacity; 3.79 GiB already allocated; 53.38 MiB free; 3.92 GiB reserved in total by PyTorch)

In [None]:
model.evaluate(test_loader)

In [None]:
model.model.doc_enc.combine_layers.layer_weights

In [13]:
500*0.2

100.0