In [1]:
# in a terminal run
# > make run-redis NS=train
# > make run-redis NS=test
# to allow access to the train and test namespaces

In [2]:
import os
import sys
import pandas as pd
import numpy as np

In [3]:
sys.path.append("..")
os.environ["USER_PATH"] = "../userdata"

In [4]:
from misc.redis import set_redis_slow_mode
from misc.util import highest_number
from model.datagenerator import create_train_test
from system.namespace.store import get_namespace

In [5]:
import torch

is_cuda = torch.cuda.is_available()
is_cuda

True

In [6]:
set_redis_slow_mode("never")
ns_test = get_namespace("test")
ns_train = get_namespace("train")
now = pd.Timestamp("2022-12-17", tz="UTC")
ttgen = create_train_test(
    train_ns=ns_train,
    train_validation_ns=ns_train,
    test_ns=ns_test,
    batch_size=4 if is_cuda else 8,
    epoch_batches=5000 if is_cuda else 500,
    train_val_size=10000 if is_cuda else 1000,
    test_size=10000 if is_cuda else 1000,
    compute_batch_size=2000 if is_cuda else 100,
    conversation_based=False,
    now=now)

In [7]:
import torch.nn as nn
from transformers import DistilBertTokenizer, DistilBertModel

In [8]:
device = torch.device("cuda") if is_cuda else torch.device("cpu")
device

device(type='cuda')

In [9]:
tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")

def tokens(texts):
    res = tokenizer(texts.tolist(), return_tensors="pt", padding=True, truncation=True)
    return {k: v.to(device) for k, v in res.items()}

class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self._bert_parent = DistilBertModel.from_pretrained("distilbert-base-uncased")
        self._bert_child = DistilBertModel.from_pretrained("distilbert-base-uncased")
        
    def get_parent_embed(self, input_ids, attention_mask):
        outputs_parent = self._bert_parent(input_ids=input_ids, attention_mask=attention_mask)
        return outputs_parent.last_hidden_state[:, 0]
    
    def get_child_embed(self, input_ids, attention_mask):
        outputs_child = self._bert_child(input_ids=input_ids, attention_mask=attention_mask)
        return outputs_child.last_hidden_state[:, 0]
        
    def forward(self, x):
        parent_cls = self.get_parent_embed(input_ids=x["parent"]["input_ids"], attention_mask=x["parent"]["attention_mask"])
        child_cls = self.get_child_embed(input_ids=x["child"]["input_ids"], attention_mask=x["child"]["attention_mask"])
        batch_size = parent_cls.shape[0]
        return torch.bmm(parent_cls.reshape([batch_size, 1, -1]), child_cls.reshape([batch_size, -1, 1])).reshape([-1, 1])
    
class TrainingHarness(nn.Module):
    def __init__(self, model):
        super().__init__()
        self._model = model
        self._softmax = nn.Softmax(dim=1)
        self._loss = nn.BCELoss()
        
    def forward(self, left, right, labels):
        out_left = self._model(left)
        out_right = self._model(right)
        preds = self._softmax(torch.hstack((out_left, out_right)))
        return preds, self._loss(preds, labels)

In [10]:
from torch.optim import AdamW

model = Model()
model.to(device)
harness = TrainingHarness(model)
harness.to(device)

folder = "checkpoints"
postfix = "_lg" if is_cuda else ""
mprev = highest_number(os.listdir(folder), prefix=f"harness{postfix}_", postfix=".pkl")
if mprev is not None:
    prev_fname, prev_epoch = mprev
    harness.load_state_dict(torch.load(os.path.join(folder, prev_fname), map_location=device))
    epoch_offset = prev_epoch + 1
else:
    epoch_offset = 0

optimizer = AdamW(harness.parameters(), lr=5e-5)
mprev, epoch_offset

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_transform.weight', 'vocab_layer_norm.weight', 'vocab_projector.weight', 'vocab_layer_norm.bias', 'vocab_projector.bias', 'vocab_transform.bias']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_transform.weight', 'vocab_layer_norm.weight', 'vocab_projector.weight', 'vocab_layer_norm.bias', 'vocab_projector.bias', 'vocab_transform.bias']
- T

In [11]:
from transformers import get_scheduler
# from tqdm.notebook import tqdm
from tqdm.auto import tqdm
import evaluate

def compute(df):
    plefts = tokens(df["parent_left"])
    clefts = tokens(df["child_left"])
    prights = tokens(df["parent_right"])
    crights = tokens(df["child_right"])
    labels = torch.tensor([~df["correct_is_right"], df["correct_is_right"]], dtype=torch.float32).T.to(device)
    return harness({"parent": plefts, "child": clefts}, {"parent": prights, "child": crights}, labels)

num_epochs = max((50 if is_cuda else 10) - epoch_offset, 3)
num_training_steps = num_epochs * ttgen.get_epoch_train_size()
lr_scheduler = get_scheduler(
    name="linear",
    optimizer=optimizer,
    num_warmup_steps=0,
    num_training_steps=num_training_steps)
ttgen.reset()

for _ in range(num_epochs):
    epoch = ttgen.get_epoch() + epoch_offset
    print(f"epoch {epoch}")
    
    model.train()
    harness.train()
    with tqdm(desc="train", total=ttgen.get_epoch_train_size()) as progress_bar:
        for train_df in ttgen.train_dfs():
            _, loss = compute(train_df)
            loss.backward()

            optimizer.step()
            lr_scheduler.step()
            optimizer.zero_grad()
            progress_bar.update(train_df.shape[0])

    folder = "checkpoints"
    postfix = "_lg" if is_cuda else ""
    torch.save(harness.state_dict(), os.path.join(folder, f"harness{postfix}_{epoch}.pkl"))
            
    model.eval()
    harness.eval()
    with torch.no_grad():
        metric_train = evaluate.load("accuracy")
        train_loss = []
        with tqdm(desc="train_val", total=ttgen.get_epoch_train_validation_size()) as progress_bar:
            for train_validation_df in ttgen.train_validation_dfs():
                preds, loss = compute(train_validation_df)
                train_loss.append(loss.item())
                predictions = torch.argmax(preds, dim=-1)
                metric_train.add_batch(predictions=predictions, references=train_validation_df["correct_is_right"].astype(int))
                progress_bar.update(train_validation_df.shape[0])
        
        metric_test = evaluate.load("accuracy")
        test_loss = []
        with tqdm(desc="test", total=ttgen.get_epoch_test_size()) as progress_bar:
            for test_df in ttgen.test_dfs():
                preds, loss = compute(test_df)
                test_loss.append(loss.item())
                predictions = torch.argmax(preds, dim=-1)
                metric_test.add_batch(predictions=predictions, references=test_df["correct_is_right"].astype(int))
                progress_bar.update(test_df.shape[0])
        
        print(f"train: {metric_train.compute()} loss: {np.mean(train_loss)}")
        print(f"test: {metric_test.compute()} loss: {np.mean(test_loss)}")
    ttgen.advance_epoch()

epoch 0


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train_val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: {'accuracy': 0.6366} loss: 0.6412688046216964
test: {'accuracy': 0.5292} loss: 0.6902655980467797
epoch 1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train_val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: {'accuracy': 0.6669} loss: 0.6366010343886446
test: {'accuracy': 0.5063} loss: 1.1789204594165086
epoch 2


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train_val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: {'accuracy': 0.769} loss: 0.42085654439592324
test: {'accuracy': 0.6298} loss: 0.6453790263026953
epoch 3


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train_val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: {'accuracy': 0.7921} loss: 0.384285542558413
test: {'accuracy': 0.6133} loss: 0.716933774253726
epoch 4


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train_val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: {'accuracy': 0.7962} loss: 0.36132966172944286
test: {'accuracy': 0.6324} loss: 0.6400475922584534
epoch 5


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train_val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: {'accuracy': 0.8151} loss: 0.3411347740044817
test: {'accuracy': 0.6412} loss: 0.6658360253214836
epoch 6


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train_val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: {'accuracy': 0.8197} loss: 0.339119175026445
test: {'accuracy': 0.5928} loss: 0.7092687988579274
epoch 7


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train_val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: {'accuracy': 0.8246} loss: 0.3227710216520354
test: {'accuracy': 0.5981} loss: 0.6681019182562828
epoch 8


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train_val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: {'accuracy': 0.8273} loss: 0.31881359425224365
test: {'accuracy': 0.6158} loss: 0.6703915956020355
epoch 9


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train_val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: {'accuracy': 0.8281} loss: 0.3606532036147779
test: {'accuracy': 0.7} loss: 0.7664583751916886
epoch 10


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train_val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: {'accuracy': 0.817} loss: 0.35585565281267045
test: {'accuracy': 0.6854} loss: 0.5891802461057901
epoch 11


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train_val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: {'accuracy': 0.8264} loss: 0.33819707686475015
test: {'accuracy': 0.5742} loss: 0.7521654302805663
epoch 12


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train_val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: {'accuracy': 0.8255} loss: 0.3157909473729902
test: {'accuracy': 0.5807} loss: 0.6732999487996102
epoch 13


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train_val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: {'accuracy': 0.8313} loss: 0.3065201754671056
test: {'accuracy': 0.718} loss: 0.5401540446192026
epoch 14


train:   0%|          | 0/20000 [00:00<?, ?it/s]

Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "/home/krause/miniconda3/envs/clotho/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3433, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/tmp/ipykernel_4827/1429975215.py", line 30, in <module>
    for train_df in ttgen.train_dfs():
  File "/home/krause/workspace/joschi/clotho/notebooks/../model/datagenerator.py", line 412, in train_dfs
    return
  File "/home/krause/workspace/joschi/clotho/notebooks/../model/datagenerator.py", line 412, in <genexpr>
    return
  File "/home/krause/workspace/joschi/clotho/notebooks/../model/datagenerator.py", line 388, in train_batches
    if not is_alive():
  File "/home/krause/workspace/joschi/clotho/notebooks/../model/datagenerator.py", line 356, in next_train_batch
    "sway_right": sway_right,
  File "/home/krause/workspace/joschi/clotho/notebooks/../model/datagenerator.py", line 350, in _get_batch_for
    return {
  File "/home/krause/workspace/joschi/clot

In [None]:
folder = "."
postfix = "_lg" if is_cuda else ""
torch.save(model.state_dict(), os.path.join(folder, f"model{postfix}.pkl"))
torch.save(harness.state_dict(), os.path.join(folder, f"harness{postfix}.pkl"))
torch.save(optimizer.state_dict(), os.path.join(folder, f"optimizer{postfix}.pkl"))