In [1]:
# in a terminal run
# > USER_PATH=/home/krause/userdata/ make run-redis NS=train
# > USER_PATH=/home/krause/userdata/ make run-redis NS=test
# to allow access to the train and test namespaces

In [2]:
import os
import sys
import json
import pandas as pd
import numpy as np
from typing import Literal, TypedDict

In [3]:
sys.path.append("..")
os.environ["USER_PATH"] = "/home/krause/userdata/"
MODEL_OUTPUT_BASE = "/mnt/d/workspace/clotho/notebooks"
MODEL_OUTPUT_CP = os.path.join(MODEL_OUTPUT_BASE, "checkpoints")

In [4]:
from misc.redis import set_redis_slow_mode
from misc.util import highest_number
from misc.io import open_write
from model.datagenerator import create_train_test
from model.transformer_embed import (
    get_epoch_and_load,
    limit_epoch_data,
    limit_epoch_data,
    get_model_filename,
)
from system.namespace.store import get_namespace

In [5]:
import torch

is_cuda = torch.cuda.is_available()
is_cuda

True

In [6]:
set_redis_slow_mode("never")
ns_test = get_namespace("test")
ns_train = get_namespace("train")
now = pd.Timestamp("2022-12-17", tz="UTC")
train_plan = [
    {
        "left": {"mode": "valid", "flip_pc": 1.0},
        "right": {"mode": "valid", "flip_pc": 0.0},
        "min_text_length": None,
        "skip_weak": False,
        "skip_topics": True,
        "flip_lr": 0.5,
        "first_epoch": 10,
        "last_epoch": None,
        "weight": 50,
    },
    {
        "left": {"mode": "valid", "flip_pc": 1.0},
        "right": {"mode": "valid", "flip_pc": 0.0},
        "min_text_length": None,
        "skip_weak": False,
        "skip_topics": True,
        "flip_lr": 0.5,
        "first_epoch": 10,
        "last_epoch": None,
        "weight": 50,
    },
    {
        "left": {"mode": "random", "flip_pc": 0.0},
        "right": {"mode": "path", "flip_pc": 0.0},
        "min_text_length": None,
        "skip_weak": True,
        "skip_topics": True,
        "flip_lr": 0.5,
        "first_epoch": None,
        "last_epoch": None,
        "weight": 60,
    },
    {
        "left": None,
        "right": {"mode": "path", "flip_pc": 0.0},
        "min_text_length": None,
        "skip_weak": True,
        "skip_topics": True,
        "flip_lr": 0.5,
        "first_epoch": None,
        "last_epoch": None,
        "weight": 40,
    },
     {
        "left": {"mode": "random", "flip_pc": 0.0},
        "right": {"mode": "path", "flip_pc": 0.0},
        "min_text_length": None,
        "skip_weak": False,
        "skip_topics": True,
        "flip_lr": 0.5,
        "first_epoch": 5,
        "last_epoch": None,
        "weight": 60,
    },
    {
        "left": None,
        "right": {"mode": "path", "flip_pc": 0.0},
        "min_text_length": None,
        "skip_weak": True,
        "skip_topics": True,
        "flip_lr": 0.5,
        "first_epoch": 5,
        "last_epoch": None,
        "weight": 40,
    },
    {
        "left": {"mode": "random", "flip_pc": 0.0},
        "right": {"mode": "valid", "flip_pc": 0.0},
        "min_text_length": None,
        "skip_weak": True,
        "skip_topics": True,
        "flip_lr": 0.5,
        "first_epoch": None,
        "last_epoch": None,
        "weight": 60,
    },
    {
        "left": None,
        "right": {"mode": "valid", "flip_pc": 0.0},
        "min_text_length": None,
        "skip_weak": True,
        "skip_topics": True,
        "flip_lr": 0.5,
        "first_epoch": None,
        "last_epoch": None,
        "weight": 40,
    },
    {
        "left": {"mode": "valid", "flip_pc": 1.0},
        "right": {"mode": "valid", "flip_pc": 0.0},
        "min_text_length": None,
        "skip_weak": False,
        "skip_topics": True,
        "flip_lr": 0.5,
        "first_epoch": 15,
        "last_epoch": None,
        "weight": 50,
    }
]
eval_plan = [
    {
        "left": {"mode": "random", "flip_pc": 0.0},
        "right": {"mode": "valid", "flip_pc": 0.0},
        "min_text_length": None,
        "skip_weak": False,
        "skip_topics": True,
        "flip_lr": 0.5,
        "weight": 60,
    },
    {
        "left": None,
        "right": {"mode": "valid", "flip_pc": 0.0},
        "min_text_length": None,
        "skip_weak": False,
        "skip_topics": True,
        "flip_lr": 0.5,
        "weight": 40,
    },
    {
        "left": {"mode": "random", "flip_pc": 0.0},
        "right": {"mode": "valid", "flip_pc": 0.0},
        "min_text_length": None,
        "skip_weak": False,
        "skip_topics": True,
        "flip_lr": 0.5,
        "weight": 60,
    },
    {
        "left": None,
        "right": {"mode": "valid", "flip_pc": 0.0},
        "min_text_length": None,
        "skip_weak": False,
        "skip_topics": True,
        "flip_lr": 0.5,
        "weight": 40,
    },
]
ttgen = create_train_test(
    train_ns=ns_train,
    train_validation_ns=ns_train,
    test_ns=ns_test,
    test_validation_ns=ns_test,
    train_learning_plan=train_plan,
    train_val_learning_plan=eval_plan,
    test_learning_plan=eval_plan,
    test_val_learning_plan=eval_plan,
    batch_size=4 if is_cuda else 8,
    epoch_batches=5000 if is_cuda else 500,
    train_val_size=10000 if is_cuda else 1000,
    test_size=10000 if is_cuda else 1000,
    test_val_size=10000 if is_cuda else 1000,
    compute_batch_size=100 if is_cuda else 100,
    now=now)

In [7]:
import torch.nn as nn
from torch.optim import AdamW
from transformers import DistilBertTokenizer, DistilBertModel

In [8]:
device = torch.device("cuda") if is_cuda else torch.device("cpu")
device

device(type='cuda')

In [17]:
ProviderRole = Literal["child", "parent"]

tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
EMBED_SIZE = 768

TokenizedInput = TypedDict('TokenizedInput', {
    "input_ids": torch.Tensor,
    "attention_mask": torch.Tensor,
})


AggType = Literal["cls", "mean"]
AGG_CLS: AggType = "cls"
AGG_MEAN: AggType = "mean"


def tokens(texts: list[str]) -> TokenizedInput:
    res = tokenizer(texts.tolist(), return_tensors="pt", padding=True, truncation=True)
    return {k: v.to(device) for k, v in res.items()}


class Noise(nn.Module):
    def __init__(self, std: float = 1.0, p: float = 0.5) -> None:
        super().__init__()
        self._std = std
        self._p = p
        self._dhold = nn.Parameter(torch.Tensor([0.0]), requires_grad=False)

    def set_std(self, std: float) -> None:
        self._std = std

    def get_std(self) -> float:
        return self._std

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        if not self.training:
            return x
        prob = torch.rand(size=x.shape, device=self._dhold.device) < self._p
        gauss = torch.normal(
            mean=0.0, std=self._std, size=x.shape, device=self._dhold.device)
        return x + prob * gauss


class Model(nn.Module):
    def __init__(self, version: int) -> None:
        super().__init__()
        assert version >= 0
        self._bert_parent = DistilBertModel.from_pretrained(
            "distilbert-base-uncased")
        self._bert_child = DistilBertModel.from_pretrained(
            "distilbert-base-uncased")
        if version in (1, 3, 4, 6):
            self._pdense: nn.Sequential | None = nn.Sequential(
                nn.Linear(EMBED_SIZE, EMBED_SIZE),
                nn.Dropout(p=0.2),
                nn.ReLU(),
                nn.Linear(EMBED_SIZE, EMBED_SIZE))
            self._cdense: nn.Sequential | None = nn.Sequential(
                nn.Linear(EMBED_SIZE, EMBED_SIZE),
                nn.Dropout(p=0.2),
                nn.ReLU(),
                nn.Linear(EMBED_SIZE, EMBED_SIZE))
        else:
            self._pdense = None
            self._cdense = None
        if version < 4 or version > 5:
            self._noise = None
        else:
            self._noise = Noise(std=1.0, p=0.2)
        if version < 2 or version > 4:
            self._cos = None
        else:
            self._cos = torch.nn.CosineSimilarity()
        if version < 6:
            self._agg = AGG_CLS
        else:
            self._agg = AGG_MEAN
        self._version = version

    def set_epoch(self, epoch: int) -> None:
        noise = self._noise
        if noise is not None:
            noise.set_std(1 / (1.2 ** epoch))

    def get_version(self) -> int:
        return self._version

    def get_agg(self, lhs: torch.Tensor) -> torch.Tensor:
        if self._agg == AGG_CLS:
            return lhs[:, 0]
        if self._agg == AGG_MEAN:
            return torch.mean(lhs, dim=1)
        raise ValueError(f"unknown aggregation: {self._agg}")

    def get_parent_embed(
            self,
            input_ids: torch.Tensor,
            attention_mask: torch.Tensor) -> torch.Tensor:
        outputs_parent = self._bert_parent(
            input_ids=input_ids, attention_mask=attention_mask)
        out = self.get_agg(outputs_parent.last_hidden_state)
        if self._pdense is not None:
            out = self._pdense(out)
        if self._noise is not None:
            out = self._noise(out)
        return out

    def get_child_embed(
            self,
            input_ids: torch.Tensor,
            attention_mask: torch.Tensor) -> torch.Tensor:
        outputs_child = self._bert_child(
            input_ids=input_ids, attention_mask=attention_mask)
        out = self.get_agg(outputs_child.last_hidden_state)
        if self._cdense is not None:
            out = self._cdense(out)
        if self._noise is not None:
            out = self._noise(out)
        return out

    def forward(self, x: dict[ProviderRole, TokenizedInput]) -> torch.Tensor:
        parent_cls = self.get_parent_embed(
            input_ids=x["parent"]["input_ids"],
            attention_mask=x["parent"]["attention_mask"])
        child_cls = self.get_child_embed(
            input_ids=x["child"]["input_ids"],
            attention_mask=x["child"]["attention_mask"])
        if self._cos is not None:
            return self._cos(parent_cls, child_cls).reshape([-1, 1])
        batch_size = parent_cls.shape[0]
        return torch.bmm(
            parent_cls.reshape([batch_size, 1, -1]),
            child_cls.reshape([batch_size, -1, 1])).reshape([-1, 1])


class BaselineModel(nn.Module):
    def __init__(self, version: int) -> None:
        super().__init__()
        assert version < 0
        self._bert = DistilBertModel.from_pretrained(
            "distilbert-base-uncased")
        if version == -2:
            self._agg = AGG_CLS
        else:
            self._agg = AGG_MEAN
        self._version = version

    def set_epoch(self, epoch: int) -> None:
        pass

    def get_version(self) -> int:
        return self._version

    def get_agg(self, lhs: torch.Tensor) -> torch.Tensor:
        if self._agg == AGG_CLS:
            return lhs[:, 0]
        if self._agg == AGG_MEAN:
            return torch.mean(lhs, dim=1)
        raise ValueError(f"unknown aggregation: {self._agg}")

    def _embed(
            self,
            input_ids: torch.Tensor,
            attention_mask: torch.Tensor) -> torch.Tensor:
        outputs = self._bert(
            input_ids=input_ids, attention_mask=attention_mask)
        return self.get_agg(outputs.last_hidden_state)

    def get_parent_embed(
            self,
            input_ids: torch.Tensor,
            attention_mask: torch.Tensor) -> torch.Tensor:
        return self._embed(input_ids, attention_mask)

    def get_child_embed(
            self,
            input_ids: torch.Tensor,
            attention_mask: torch.Tensor) -> torch.Tensor:
        return self._embed(input_ids, attention_mask)

    def forward(self, x: dict[ProviderRole, TokenizedInput]) -> torch.Tensor:
        parent_cls = self.get_parent_embed(
            input_ids=x["parent"]["input_ids"],
            attention_mask=x["parent"]["attention_mask"])
        child_cls = self.get_child_embed(
            input_ids=x["child"]["input_ids"],
            attention_mask=x["child"]["attention_mask"])
        batch_size = parent_cls.shape[0]
        return torch.bmm(
            parent_cls.reshape([batch_size, 1, -1]),
            child_cls.reshape([batch_size, -1, 1])).reshape([-1, 1])


EitherModel = Model | BaselineModel


class TrainingHarness(nn.Module):
    def __init__(self, model: EitherModel) -> None:
        super().__init__()
        self._model = model
        self._softmax = nn.Softmax(dim=1)
        self._loss = nn.BCELoss()

    def get_version(self) -> int:
        return self._model.get_version()

    def forward(
            self,
            left: TokenizedInput,
            right: TokenizedInput,
            labels: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
        out_left = self._model(left)
        out_right = self._model(right)
        preds = self._softmax(torch.hstack((out_left, out_right)))
        return preds, self._loss(preds, labels)

In [18]:
from transformers import get_scheduler
# from tqdm.notebook import tqdm
from tqdm.auto import tqdm
import evaluate
import time


def create_model(version: int) -> EitherModel:
    return Model(version) if version >= 0 else BaselineModel(version)


def compute(harness, df):
    plefts = tokens(df["parent_left"])
    clefts = tokens(df["child_left"])
    prights = tokens(df["parent_right"])
    crights = tokens(df["child_right"])
    labels = torch.tensor(
        [~df["correct_is_right"], df["correct_is_right"]],
        dtype=torch.float32).T.to(device)
    return harness(
        left={"parent": plefts, "child": clefts},
        right={"parent": prights, "child": crights},
        labels=labels)


def run_training(num_epochs, version, force_restart):
    model = create_model(version)
    model.to(device)
    harness = TrainingHarness(model)
    harness.to(device)

    mprev, epoch_offset = get_epoch_and_load(
        harness,
        MODEL_OUTPUT_CP,
        ftype="harness",
        is_cuda=is_cuda,
        device=device,
        force_restart=force_restart)

    optimizer = AdamW(harness.parameters(), lr=5e-5)
    print(mprev, epoch_offset)
    
    num_epochs -= epoch_offset
    if num_epochs <= 0:
        print("already computed all epochs. nothing to do!")
        return model, harness, optimizer
    
    num_training_steps = num_epochs * ttgen.get_epoch_train_size()
    warmup = 10000 if is_cuda else 10
    lr_scheduler = get_scheduler(
        name="linear",
        optimizer=optimizer,
        num_warmup_steps=warmup,
        num_training_steps=num_training_steps - warmup)
    ttgen.set_epoch(epoch_offset)
    
    log_csv = get_model_filename(
        harness,
        MODEL_OUTPUT_BASE,
        is_cuda=is_cuda,
        ftype="val_log",
        epoch=None,
        ext=".csv")
    columns = [
        "epoch",
        "train_acc",
        "train_loss",
        "train_val_acc",
        "train_val_loss",
        "test_acc",
        "test_loss",
        "time",
        "version",
        "fname",
    ]
    if not os.path.exists(log_csv):
        pd.DataFrame([], columns=columns).to_csv(
            log_csv, header=True, mode="w", columns=columns)

    for _ in range(num_epochs):
        epoch = ttgen.get_epoch()
        print(f"epoch {epoch} version: {harness.get_version()}")
        real_time = time.monotonic()

        model.train()
        harness.train()
        model.set_epoch(epoch)
        metric_train = evaluate.load("accuracy")
        train_loss = []
        first = True
        with tqdm(desc="train", total=ttgen.get_epoch_train_size()) as progress_bar:
            for train_df in ttgen.train_dfs():
                preds, loss = compute(harness, train_df)
                train_loss.append(loss.item())
                loss.backward()

                optimizer.step()
                lr_scheduler.step()
                optimizer.zero_grad()
                progress_bar.update(train_df.shape[0])

                predictions = torch.argmax(preds, dim=-1)
                metric_train.add_batch(
                    predictions=predictions,
                    references=train_df["correct_is_right"].astype(int))
                if first:
                    # display(train_df)
                    first = False

        model_fname = get_model_filename(
            harness,
            MODEL_OUTPUT_CP,
            is_cuda=is_cuda,
            ftype="harness",
            epoch=epoch)
        torch.save(harness.state_dict(), model_fname)

        model.eval()
        harness.eval()
        with torch.no_grad():
            metric_val_train = evaluate.load("accuracy")
            train_val_loss = []
            with tqdm(desc="train val", total=ttgen.get_epoch_train_validation_size()) as progress_bar:
                for train_validation_df in ttgen.train_validation_dfs():
                    preds, loss = compute(harness, train_validation_df)
                    train_val_loss.append(loss.item())
                    predictions = torch.argmax(preds, dim=-1)
                    metric_val_train.add_batch(
                        predictions=predictions,
                        references=train_validation_df["correct_is_right"].astype(int))
                    progress_bar.update(train_validation_df.shape[0])

            metric_test = evaluate.load("accuracy")
            test_loss = []
            with tqdm(desc="test", total=ttgen.get_epoch_test_size()) as progress_bar:
                for test_df in ttgen.test_dfs():
                    preds, loss = compute(harness, test_df)
                    test_loss.append(loss.item())
                    predictions = torch.argmax(preds, dim=-1)
                    metric_test.add_batch(
                        predictions=predictions,
                        references=test_df["correct_is_right"].astype(int))
                    progress_bar.update(test_df.shape[0])
            stats = {
                "epoch": int(epoch),
                "train_acc": float(metric_train.compute()['accuracy']),
                "train_loss": float(np.mean(train_loss)),
                "train_val_acc": float(metric_val_train.compute()['accuracy']),
                "train_val_loss": float(np.mean(train_val_loss)),
                "test_acc": float(metric_test.compute()['accuracy']),
                "test_loss": float(np.mean(test_loss)),
                "time": 0.0,
                "version": harness.get_version(),
                "fname": model_fname,
            }

        print(f"train: {stats['train_acc']} loss: {stats['train_loss']}")
        print(f"train val: {stats['train_val_acc']} loss: {stats['train_val_loss']}")
        print(f"test: {stats['test_acc']} loss: {stats['test_loss']}")
        ttgen.advance_epoch()
        stats["time"] = float((time.monotonic() - real_time) / 60.0)
        print(f"epoch time: {stats['time']:.2f}min")
        stats_fn = get_model_filename(
            harness,
            MODEL_OUTPUT_CP,
            is_cuda=is_cuda,
            ftype="stats",
            epoch=epoch,
            ext=".json")
        with open_write(stats_fn, text=True) as fout:
            print(json.dumps(stats, indent=2, sort_keys=True), file=fout)
        stats_df = pd.DataFrame(
            {key: [val] for key, val in stats.items()},
            columns=columns)
        stats_df.to_csv(
            log_csv, header=False, mode="a")
            
        limit_epoch_data(
            harness,
            MODEL_OUTPUT_CP,
            is_cuda=is_cuda,
            ftype="stats",
            ext=".json",
            count=5)
    return model, harness, optimizer

In [19]:
def save_model(model, harness, optimizer):
    torch.save(model.state_dict(), get_model_filename(
        harness,
        MODEL_OUTPUT_BASE,
        is_cuda=is_cuda,
        ftype="model",
        epoch=None))
    torch.save(harness.state_dict(), get_model_filename(
        harness,
        MODEL_OUTPUT_BASE,
        is_cuda=is_cuda,
        ftype="harness",
        epoch=None))
    torch.save(optimizer.state_dict(), get_model_filename(
        harness,
        MODEL_OUTPUT_BASE,
        is_cuda=is_cuda,
        ftype="optimizer",
        epoch=None))

In [20]:
def validation(model, harness):
    ttgen.reset()
    model.eval()
    harness.eval()
    dfs = []
    with torch.no_grad():
        metric_val_test = evaluate.load("accuracy")
        test_val_loss = []
        with tqdm(desc="test val", total=ttgen.get_epoch_test_validation_size()) as progress_bar:
            for test_val_df in ttgen.test_validation_dfs():
                preds, loss = compute(harness, test_val_df)
                test_val_loss.append(loss.item())
                predictions = torch.argmax(preds, dim=-1)
                metric_val_test.add_batch(
                    predictions=predictions,
                    references=test_val_df["correct_is_right"].astype(int))
                cur_df = test_val_df.copy()
                cur_df["logit_left"] = preds[:, 0].cpu()
                cur_df["logit_right"] = preds[:, 1].cpu()
                cur_df["preds"] = predictions.cpu()
                cur_df["truth"] = test_val_df["correct_is_right"].astype(int)
                dfs.append(cur_df)
                progress_bar.update(test_val_df.shape[0])
    print(f"test val: {metric_val_test.compute()} loss: {np.mean(test_val_loss)}")
    validation_df = pd.concat(dfs)
    validation_df.to_csv(get_model_filename(
        harness,
        MODEL_OUTPUT_BASE,
        is_cuda=is_cuda,
        ftype="validation",
        epoch=None,
        ext=".csv"))
    print("correct")
    display(validation_df[validation_df["preds"] == validation_df["truth"]].head())
    print("incorrect")
    display(validation_df[validation_df["preds"] != validation_df["truth"]].head())

In [21]:
def embeds(model):
    ttgen.reset()
    model.eval()
    with torch.no_grad():
        count = 0
        for test_val_df in ttgen.test_validation_dfs():
            plefts = tokens(test_val_df["parent_left"])
            clefts = tokens(test_val_df["child_left"])
            prights = tokens(test_val_df["parent_right"])
            crights = tokens(test_val_df["child_right"])
            display(model.get_child_embed(
                clefts["input_ids"],
                clefts["attention_mask"]).cpu().numpy())
            display(model.get_child_embed(
                crights["input_ids"],
                crights["attention_mask"]).cpu().numpy())
            count += 1
            if count >= 5:
                break

In [22]:
def full_run(*, num_epochs, version, force_restart):
    model, harness, optimizer = run_training(
        num_epochs, version, force_restart)
    save_model(model, harness, optimizer)
    validation(model, harness)
    embeds(model)

In [23]:
# for version in range(8):
#     full_run(num_epochs=30, version=version, force_restart=False)

In [24]:
full_run(num_epochs=30, version=-1, force_restart=False)

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_transform.bias', 'vocab_projector.bias', 'vocab_projector.weight', 'vocab_layer_norm.bias', 'vocab_transform.weight', 'vocab_layer_norm.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


None 0
epoch 0 version: -1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.56085 loss: 1.38945310176434
train val: 0.5646 loss: 0.9130380945429206
test: 0.577 loss: 0.9443815663428977
epoch time: 97.83min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_0.pkl
best train: 0.56085
best train val: 0.5646
best test: 0.577
epoch 1 version: -1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.61815 loss: 0.7195573684424162
train val: 0.6458 loss: 0.6127327186256647
test: 0.597 loss: 0.7366132311806083
epoch time: 97.07min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_1.pkl
best train: 0.61815
best train val: 0.6458
best test: 0.597
epoch 2 version: -1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.70825 loss: 0.4987365668807179
train val: 0.7209 loss: 0.48274152010558175
test: 0.6563 loss: 0.6201377160090953
epoch time: 98.35min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_2.pkl
best train: 0.70825
best train val: 0.7209
best test: 0.6563
epoch 3 version: -1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.7384 loss: 0.4419358304378111
train val: 0.7286 loss: 0.47039060950055717
test: 0.6355 loss: 0.6300585457876324
epoch time: 98.31min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_3.pkl
best train: 0.7384
best train val: 0.7286
best test: 0.6355
epoch 4 version: -1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.73115 loss: 0.45466241342632563
train val: 0.7285 loss: 0.4810362111664377
test: 0.6395 loss: 0.6366368094600737
epoch time: 95.29min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_3.pkl
best train: 0.7384
best train val: 0.7286
best test: 0.6355
epoch 5 version: -1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.7381 loss: 0.44120950333508663
train val: 0.7187 loss: 0.4886403533383273
test: 0.6321 loss: 0.6491294624179602
epoch time: 79.75min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_3.pkl
best train: 0.7384
best train val: 0.7286
best test: 0.6355
epoch 6 version: -1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.7117 loss: 0.5119664853511379
train val: 0.6149 loss: 0.6290137491881848
test: 0.5461 loss: 0.697455470609665
epoch time: 79.19min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v-1_lg_0.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_0.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_3.pkl
best train: 0.7384
best train val: 0.7286
best test: 0.6355
epoch 7 version: -1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.7091 loss: 0.4863916858772747
train val: 0.6884 loss: 0.5252088767588139
test: 0.597 loss: 0.6791111347079277
epoch time: 11.08min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v-1_lg_6.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_6.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_3.pkl
best train: 0.7384
best train val: 0.7286
best test: 0.6355
epoch 8 version: -1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.7504 loss: 0.41474713635820665
train val: 0.7262 loss: 0.4749997805494815
test: 0.633 loss: 0.6372083501785993
epoch time: 10.15min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v-1_lg_1.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_1.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_3.pkl
best train: 0.7384
best train val: 0.7286
best test: 0.6355
epoch 9 version: -1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.75555 loss: 0.39908629610524515
train val: 0.725 loss: 0.4691934071943164
test: 0.6395 loss: 0.5962330000519752
epoch time: 9.92min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v-1_lg_7.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_7.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_3.pkl
best train: 0.7384
best train val: 0.7286
best test: 0.6355
epoch 10 version: -1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.6656 loss: 0.5723137661572546
train val: 0.6997 loss: 0.5801102249622345
test: 0.6081 loss: 0.6512970570802689
epoch time: 11.60min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v-1_lg_5.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_5.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_3.pkl
best train: 0.7384
best train val: 0.7286
best test: 0.6355
epoch 11 version: -1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.663 loss: 0.6155915836295157
train val: 0.7101 loss: 0.5370593824446201
test: 0.6272 loss: 0.6203643132865428
epoch time: 10.27min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v-1_lg_10.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_10.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_3.pkl
best train: 0.7384
best train val: 0.7286
best test: 0.6355
epoch 12 version: -1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.66775 loss: 0.5608567715289071
train val: 0.6934 loss: 0.5418759477436542
test: 0.6173 loss: 0.6326223272621632
epoch time: 79.69min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v-1_lg_11.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_11.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_3.pkl
best train: 0.7384
best train val: 0.7286
best test: 0.6355
epoch 13 version: -1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.66215 loss: 0.565260159339942
train val: 0.6734 loss: 0.562143769866228
test: 0.6155 loss: 0.6252480883657933
epoch time: 77.96min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v-1_lg_12.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_12.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_3.pkl
best train: 0.7384
best train val: 0.7286
best test: 0.6355
epoch 14 version: -1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.6664 loss: 0.5638407687030733
train val: 0.6989 loss: 0.5676623407661915
test: 0.6025 loss: 0.6452380327701569
epoch time: 78.82min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v-1_lg_13.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_13.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_3.pkl
best train: 0.7384
best train val: 0.7286
best test: 0.6355
epoch 15 version: -1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.6401 loss: 0.5911871332839131
train val: 0.7001 loss: 0.5642062939286232
test: 0.6039 loss: 0.6432137469649315
epoch time: 80.10min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v-1_lg_14.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_14.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_3.pkl
best train: 0.7384
best train val: 0.7286
best test: 0.6355
epoch 16 version: -1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.6388 loss: 0.5896157197862864
train val: 0.6988 loss: 0.5453220775365829
test: 0.6051 loss: 0.6405135773897171
epoch time: 80.28min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v-1_lg_15.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_15.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_3.pkl
best train: 0.7384
best train val: 0.7286
best test: 0.6355
epoch 17 version: -1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.6415 loss: 0.589526033899188
train val: 0.6637 loss: 0.5781770874798298
test: 0.5754 loss: 0.6645675924062728
epoch time: 80.26min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v-1_lg_16.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_16.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_3.pkl
best train: 0.7384
best train val: 0.7286
best test: 0.6355
epoch 18 version: -1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.6386 loss: 0.5918147217854858
train val: 0.7052 loss: 0.5636243393719196
test: 0.6051 loss: 0.6511908037781715
epoch time: 80.36min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v-1_lg_17.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_17.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_3.pkl
best train: 0.7384
best train val: 0.7286
best test: 0.6355
epoch 19 version: -1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.6397 loss: 0.6075041025727987
train val: 0.7032 loss: 0.599556890130043
test: 0.6009 loss: 0.6647529211878777
epoch time: 79.85min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v-1_lg_18.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_18.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_3.pkl
best train: 0.7384
best train val: 0.7286
best test: 0.6355
epoch 20 version: -1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.6476 loss: 0.5785623337294906
train val: 0.7012 loss: 0.5731244492650032
test: 0.6033 loss: 0.6502480636596679
epoch time: 81.01min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v-1_lg_19.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_19.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_3.pkl
best train: 0.7384
best train val: 0.7286
best test: 0.6355
epoch 21 version: -1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.6453 loss: 0.5845880770996214
train val: 0.6853 loss: 0.5851933311223984
test: 0.5984 loss: 0.653357640016079
epoch time: 84.79min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v-1_lg_20.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_20.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_3.pkl
best train: 0.7384
best train val: 0.7286
best test: 0.6355
epoch 22 version: -1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.64215 loss: 0.9563763603620231
train val: 0.6743 loss: 0.5771103532195091
test: 0.5755 loss: 0.6673170557260514
epoch time: 88.16min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v-1_lg_21.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_21.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_3.pkl
best train: 0.7384
best train val: 0.7286
best test: 0.6355
epoch 23 version: -1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.6439 loss: 0.5864277446314693
train val: 0.7052 loss: 0.5517686339020729
test: 0.5958 loss: 0.6454229927241802
epoch time: 89.84min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v-1_lg_22.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_22.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_3.pkl
best train: 0.7384
best train val: 0.7286
best test: 0.6355
epoch 24 version: -1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.63385 loss: 0.6004580841161311
train val: 0.6254 loss: 0.6312077641010284
test: 0.5879 loss: 0.6607893728017807
epoch time: 88.07min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v-1_lg_23.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_23.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_3.pkl
best train: 0.7384
best train val: 0.7286
best test: 0.6355
epoch 25 version: -1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.63735 loss: 0.590343584985286
train val: 0.685 loss: 0.5762076863110065
test: 0.569 loss: 0.6720563063383103
epoch time: 89.60min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v-1_lg_24.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_24.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_3.pkl
best train: 0.7384
best train val: 0.7286
best test: 0.6355
epoch 26 version: -1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.6356 loss: 0.5845916539721191
train val: 0.6924 loss: 0.5747366861224175
test: 0.59 loss: 0.6599507623076439
epoch time: 88.57min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v-1_lg_25.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_25.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_3.pkl
best train: 0.7384
best train val: 0.7286
best test: 0.6355
epoch 27 version: -1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.64495 loss: 0.5836259418576956
train val: 0.6952 loss: 0.5505485960066319
test: 0.6006 loss: 0.6462153318285943
epoch time: 89.83min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v-1_lg_26.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_26.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_3.pkl
best train: 0.7384
best train val: 0.7286
best test: 0.6355
epoch 28 version: -1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.641 loss: 0.581787889508158
train val: 0.6963 loss: 0.5720851927399635
test: 0.5898 loss: 0.6549621745228767
epoch time: 89.59min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v-1_lg_27.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_27.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_3.pkl
best train: 0.7384
best train val: 0.7286
best test: 0.6355
epoch 29 version: -1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.639 loss: 0.5855579840373248
train val: 0.693 loss: 0.5727791177213192
test: 0.5958 loss: 0.6543426186203957
epoch time: 88.88min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v-1_lg_28.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_28.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v-1_lg_3.pkl
best train: 0.7384
best train val: 0.7286
best test: 0.6355


test val:   0%|          | 0/10000 [00:00<?, ?it/s]

test val: {'accuracy': 0.5956} loss: 0.6514595863461494
correct


Unnamed: 0,gen_name,parent_left,child_left,parent_right,child_right,sway_left,sway_right,correct_is_right,logit_left,logit_right,preds,truth
1,*valid--!copy;(st),"How do men feel about the ""men will fuck anyth...","Not really positive, I’d need to fuck it first.","Not really positive, I’d need to fuck it first.","How do men feel about the ""men will fuck anyth...",0.880797,0.119203,False,0.502861,0.497139,0,0
0,random--valid;(st),Once is better than twice,"Going too fast and coming out, hitting the buf...",What is 'hands down' the best Ice-cream flavour?,Pistacchio,0.0,1.0,True,0.351436,0.648564,1,1
0,*valid--random;(st),What’s your controversial food opinion?,"I like chocolate candy enough, but i'll usuall...","""consent"" is the lack of disagreement. By defi...","Yeah, I agree with you 100%. The sad part is t...",0.731059,0.268941,False,0.658205,0.341795,0,0
1,*valid--!copy;(st),You're trapped inside the last video game you ...,Death Stranding.....god help me,Death Stranding.....god help me,You're trapped inside the last video game you ...,0.997527,0.002473,False,0.500435,0.499565,0,0
2,*valid--random;(st),"Hypothetically, if you \_absolute\_ had to, ho...",A wet cloth over your nose and mouth may help ...,A snickers,My best friend and I are both deeply in love. ...,0.999665,0.000335,False,0.632183,0.367816,0,0


incorrect


Unnamed: 0,gen_name,parent_left,child_left,parent_right,child_right,sway_left,sway_right,correct_is_right,logit_left,logit_right,preds,truth
0,!copy--valid;(st),Those pits with the spikes at the bottom,What's one of life's biggest traps that people...,What's one of life's biggest traps that people...,Those pits with the spikes at the bottom,0.268941,0.731059,True,0.500938,0.499062,0,1
2,random--valid;(st),Absolutely. I do appreciate the fact he was ho...,Literally,"Question for the men, what's the worst part of...",The constant burning desire to use it on women,0.268941,0.731059,True,0.532638,0.467362,0,1
3,random--valid;(st),Save Money.,e * i * PI.,They dress like and try to act like they are g...,"...Merle Haggard, Lefty Frizzell, Travis Tritt...",6e-06,0.999994,True,0.524749,0.475251,0,1
1,random--valid;(st),When i was 15 i asked google how to get high w...,Top of the mornin,Met up with this guy from Tinder. After talkin...,r/suspiciouslyspecific,0.000335,0.999665,True,0.56618,0.43382,0,1
2,!copy--valid;(st),Sit up straight,What should one do in their 20s to avoid regre...,What should one do in their 20s to avoid regre...,Sit up straight,0.268941,0.731059,True,0.50013,0.49987,0,1


array([[0.05172537, 0.2616654 , 0.16059825, ..., 0.18875706, 0.09090954,
        0.0569493 ],
       [0.0667818 , 0.23610862, 0.17338227, ..., 0.21642873, 0.08729238,
        0.13777624],
       [0.05132895, 0.26178664, 0.15636002, ..., 0.20011243, 0.0822215 ,
        0.1014479 ],
       [0.04436886, 0.2747853 , 0.15668908, ..., 0.2243459 , 0.09403652,
        0.10423953]], dtype=float32)

array([[0.10485673, 0.24969472, 0.14840814, ..., 0.22476393, 0.07986017,
        0.12184577],
       [0.03457205, 0.23018454, 0.20052108, ..., 0.20018904, 0.10036353,
        0.07910628],
       [0.07416499, 0.27039415, 0.15739883, ..., 0.21843106, 0.08888397,
        0.1253656 ],
       [0.08307   , 0.25807709, 0.1745292 , ..., 0.21982375, 0.09519888,
        0.11396834]], dtype=float32)

array([[0.10809328, 0.24856174, 0.18278542, ..., 0.2267617 , 0.11335384,
        0.13215177],
       [0.03338067, 0.2578455 , 0.159392  , ..., 0.18650009, 0.10126952,
        0.07427056],
       [0.08723674, 0.22950628, 0.17995746, ..., 0.23410955, 0.08082322,
        0.11344848],
       [0.1221833 , 0.2955345 , 0.14812267, ..., 0.23145905, 0.09965942,
        0.1166001 ]], dtype=float32)

array([[0.06975555, 0.26773864, 0.17263907, ..., 0.2291256 , 0.10721157,
        0.10563242],
       [0.07576317, 0.29177386, 0.15481092, ..., 0.23010202, 0.10351443,
        0.10437338],
       [0.03065643, 0.24088725, 0.14844838, ..., 0.17939828, 0.04985744,
        0.13287757],
       [0.09849106, 0.26060215, 0.1770672 , ..., 0.2311436 , 0.08980531,
        0.153997  ]], dtype=float32)

array([[0.13462913, 0.27306882, 0.16230515, ..., 0.24041635, 0.09097393,
        0.13100082],
       [0.07982985, 0.26144907, 0.17884693, ..., 0.23426087, 0.09365927,
        0.13433553],
       [0.12603833, 0.2994165 , 0.14204693, ..., 0.20726651, 0.07357101,
        0.07988702],
       [0.10634112, 0.23392053, 0.16729814, ..., 0.23599091, 0.0597135 ,
        0.13610387]], dtype=float32)

array([[0.04451372, 0.26753968, 0.14872947, ..., 0.2072586 , 0.05033529,
        0.12699212],
       [0.10094371, 0.27681297, 0.15124492, ..., 0.20351683, 0.08930163,
        0.04914492],
       [0.101463  , 0.24599555, 0.20501748, ..., 0.21281101, 0.08309069,
        0.15020776],
       [0.07212768, 0.24315128, 0.17190704, ..., 0.20010717, 0.07074664,
        0.1240553 ]], dtype=float32)

array([[0.04088778, 0.22220714, 0.18750073, ..., 0.2110226 , 0.10171761,
        0.09561012],
       [0.07189051, 0.25865635, 0.19133043, ..., 0.21333188, 0.11329992,
        0.09859306],
       [0.10516229, 0.26286185, 0.1343041 , ..., 0.24138553, 0.03868827,
        0.16120382],
       [0.12646155, 0.2659252 , 0.15781476, ..., 0.21670517, 0.083362  ,
        0.12453119]], dtype=float32)

array([[0.10278735, 0.261392  , 0.19462354, ..., 0.19629607, 0.10508237,
        0.09148873],
       [0.01169097, 0.21965125, 0.18232442, ..., 0.16835526, 0.07425379,
        0.11272497],
       [0.13274528, 0.27420542, 0.17499395, ..., 0.23856232, 0.09475509,
        0.11544965],
       [0.10445559, 0.2716134 , 0.19586547, ..., 0.23201224, 0.11532611,
        0.10299481]], dtype=float32)

array([[0.04515429, 0.20944549, 0.16893414, ..., 0.23550043, 0.0762472 ,
        0.1522683 ],
       [0.1391955 , 0.27980894, 0.18513805, ..., 0.21002716, 0.085454  ,
        0.10228127],
       [0.10004909, 0.23407501, 0.1893128 , ..., 0.23639928, 0.08314568,
        0.13458422],
       [0.07630298, 0.2505793 , 0.17952417, ..., 0.23999168, 0.1128565 ,
        0.08795626]], dtype=float32)

array([[0.05608737, 0.25853637, 0.1607169 , ..., 0.24087219, 0.1031827 ,
        0.12489446],
       [0.03183381, 0.22652684, 0.17239109, ..., 0.21221349, 0.09734751,
        0.1003384 ],
       [0.06282149, 0.23636988, 0.16687582, ..., 0.21612741, 0.09005637,
        0.11346034],
       [0.10946818, 0.27028346, 0.18167287, ..., 0.23133129, 0.08723096,
        0.15608232]], dtype=float32)

In [25]:
full_run(num_epochs=30, version=7, force_restart=False)

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_transform.bias', 'vocab_projector.bias', 'vocab_projector.weight', 'vocab_layer_norm.bias', 'vocab_transform.weight', 'vocab_layer_norm.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_transform.bias', 'vocab_projector.bias', 'vocab_projector.weight', 'vocab_layer_norm.bias', 'vocab_transform.weight', 'vocab_layer_norm.weight']
- T

('harness_v7_lg_11.pkl', 11) 12
epoch 12 version: 7


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.8316 loss: 0.3218663605826943
train val: 0.7286 loss: 0.4963195043582935
test: 0.6769 loss: 0.6066228310704231
epoch time: 11.89min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v7_lg_11.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_11.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_7.pkl
best train: 0.86765
best train val: 0.7702
best test: 0.6667
epoch 13 version: 7


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.83465 loss: 0.3197351923821838
train val: 0.725 loss: 0.4833948339025839
test: 0.7125 loss: 0.6103091636538506
epoch time: 12.43min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v7_lg_12.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_12.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_7.pkl
best train: 0.86765
best train val: 0.7702
best test: 0.6667
epoch 14 version: 7


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.8253 loss: 0.3337356517088352
train val: 0.732 loss: 0.46976738295692194
test: 0.6804 loss: 0.6096486610293388
epoch time: 11.68min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v7_lg_13.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_13.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_7.pkl
best train: 0.86765
best train val: 0.7702
best test: 0.6667
epoch 15 version: 7


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.8269 loss: 0.3343554809068167
train val: 0.7261 loss: 0.4772868040919591
test: 0.6713 loss: 0.6143515691936016
epoch time: 12.09min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v7_lg_14.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_14.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_7.pkl
best train: 0.86765
best train val: 0.7702
best test: 0.6667
epoch 16 version: 7


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.83015 loss: 0.36674712454255515
train val: 0.7288 loss: 0.48026414904925185
test: 0.6713 loss: 0.6200244775831699
epoch time: 10.09min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v7_lg_15.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_15.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_7.pkl
best train: 0.86765
best train val: 0.7702
best test: 0.6667
epoch 17 version: 7


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.61805 loss: 0.9263598433961749
train val: 0.581 loss: 0.6540724976360798
test: 0.5816 loss: 0.6883665808439254
epoch time: 10.31min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v7_lg_16.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_16.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_7.pkl
best train: 0.86765
best train val: 0.7702
best test: 0.6667
epoch 18 version: 7


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.6564 loss: 1.7823934301786364
train val: 0.4192 loss: 1.2139735162496568
test: 0.4283 loss: 1.9945186079621315
epoch time: 11.72min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v7_lg_17.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_17.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_7.pkl
best train: 0.86765
best train val: 0.7702
best test: 0.6667
epoch 19 version: 7


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.62945 loss: 0.7580428565181792
train val: 0.6746 loss: 0.929365025022626
test: 0.5405 loss: 2.42289126855582
epoch time: 12.06min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v7_lg_18.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_18.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_7.pkl
best train: 0.86765
best train val: 0.7702
best test: 0.6667
epoch 20 version: 7


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.72855 loss: 0.5271372699208092
train val: 0.6764 loss: 0.7985412669675425
test: 0.6161 loss: 0.9256462417945266
epoch time: 11.82min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v7_lg_19.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_19.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_7.pkl
best train: 0.86765
best train val: 0.7702
best test: 0.6667
epoch 21 version: 7


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.74455 loss: 0.4658419504993595
train val: 0.6754 loss: 0.6029574700646161
test: 0.4988 loss: 0.8773100112378597
epoch time: 9.85min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v7_lg_20.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_20.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_7.pkl
best train: 0.86765
best train val: 0.7702
best test: 0.6667
epoch 22 version: 7


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.7774 loss: 0.4301696474901866
train val: 0.693 loss: 0.6318229826723458
test: 0.5243 loss: 0.9129233436137438
epoch time: 9.65min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v7_lg_21.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_21.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_7.pkl
best train: 0.86765
best train val: 0.7702
best test: 0.6667
epoch 23 version: 7


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.80135 loss: 0.3665015610134171
train val: 0.7178 loss: 0.5342864612337959
test: 0.6439 loss: 0.6767418700605631
epoch time: 11.36min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v7_lg_22.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_22.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_7.pkl
best train: 0.86765
best train val: 0.7702
best test: 0.6667
epoch 24 version: 7


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.82515 loss: 0.3406749350453927
train val: 0.7165 loss: 0.48628076610416177
test: 0.6718 loss: 0.60782540615201
epoch time: 12.54min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v7_lg_23.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_23.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_7.pkl
best train: 0.86765
best train val: 0.7702
best test: 0.6667
epoch 25 version: 7


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.83215 loss: 0.3290070270499995
train val: 0.7364 loss: 0.46816060961205513
test: 0.6638 loss: 0.6143329051315785
epoch time: 12.10min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v7_lg_24.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_24.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_7.pkl
best train: 0.86765
best train val: 0.7702
best test: 0.6667
epoch 26 version: 7


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.80445 loss: 0.35264602242560017
train val: 0.719 loss: 0.48822536666724775
test: 0.6537 loss: 0.6270110027849675
epoch time: 12.06min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v7_lg_25.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_25.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_7.pkl
best train: 0.86765
best train val: 0.7702
best test: 0.6667
epoch 27 version: 7


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.8326 loss: 0.31798327553434574
train val: 0.7327 loss: 0.4664560620496515
test: 0.6557 loss: 0.6194433892190456
epoch time: 17.37min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v7_lg_26.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_26.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_7.pkl
best train: 0.86765
best train val: 0.7702
best test: 0.6667
epoch 28 version: 7


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.83875 loss: 0.3099230536383884
train val: 0.7408 loss: 0.5179823271461675
test: 0.6335 loss: 0.7020590723961592
epoch time: 19.69min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v7_lg_27.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_27.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_7.pkl
best train: 0.86765
best train val: 0.7702
best test: 0.6667
epoch 29 version: 7


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.8376 loss: 0.31862424078370716
train val: 0.7377 loss: 0.46788969291959565
test: 0.6104 loss: 0.6495328749239445
epoch time: 19.36min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v7_lg_6.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_6.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_7.pkl
best train: 0.86765
best train val: 0.7702
best test: 0.6667


test val:   0%|          | 0/10000 [00:00<?, ?it/s]

test val: {'accuracy': 0.619} loss: 0.6504103107094765
correct


Unnamed: 0,gen_name,parent_left,child_left,parent_right,child_right,sway_left,sway_right,correct_is_right,logit_left,logit_right,preds,truth
0,!copy--valid;(st),Those pits with the spikes at the bottom,What's one of life's biggest traps that people...,What's one of life's biggest traps that people...,Those pits with the spikes at the bottom,0.268941,0.731059,True,0.280542,0.719458,1,1
1,*valid--!copy;(st),"How do men feel about the ""men will fuck anyth...","Not really positive, I’d need to fuck it first.","Not really positive, I’d need to fuck it first.","How do men feel about the ""men will fuck anyth...",0.880797,0.119203,False,0.715648,0.284352,0,0
0,random--valid;(st),Once is better than twice,"Going too fast and coming out, hitting the buf...",What is 'hands down' the best Ice-cream flavour?,Pistacchio,0.0,1.0,True,0.24504,0.75496,1,1
1,random--valid;(st),When i was 15 i asked google how to get high w...,Top of the mornin,Met up with this guy from Tinder. After talkin...,r/suspiciouslyspecific,0.000335,0.999665,True,0.267298,0.732702,1,1
2,!copy--valid;(st),Sit up straight,What should one do in their 20s to avoid regre...,What should one do in their 20s to avoid regre...,Sit up straight,0.268941,0.731059,True,0.282741,0.717259,1,1


incorrect


Unnamed: 0,gen_name,parent_left,child_left,parent_right,child_right,sway_left,sway_right,correct_is_right,logit_left,logit_right,preds,truth
2,random--valid;(st),Absolutely. I do appreciate the fact he was ho...,Literally,"Question for the men, what's the worst part of...",The constant burning desire to use it on women,0.268941,0.731059,True,0.743424,0.256576,0,1
3,random--valid;(st),Save Money.,e * i * PI.,They dress like and try to act like they are g...,"...Merle Haggard, Lefty Frizzell, Travis Tritt...",6e-06,0.999994,True,0.690412,0.309588,0,1
3,random--valid;(st),Yikes. This is how you go broke. Buying cars j...,Not 1 to 10 cause it’s a negative number,"Wow, I've never heard of any of this. And BTW,...",Yup. It’s ok if ADULTS want circumcision or la...,0.268941,0.731059,True,0.561384,0.438616,0,1
0,*valid--random;(st),What’s your controversial food opinion?,"I like chocolate candy enough, but i'll usuall...","""consent"" is the lack of disagreement. By defi...","Yeah, I agree with you 100%. The sad part is t...",0.731059,0.268941,False,0.36715,0.63285,1,0
3,*valid--!copy;(st),What's a women's thing men should absolutely s...,"“Housework” type life-skills like sewing, cook...","“Housework” type life-skills like sewing, cook...",What's a women's thing men should absolutely s...,0.731059,0.268941,False,0.405519,0.594481,1,0


array([[ 0.63208026,  0.02971917, -0.8655138 , ...,  0.77816254,
        -0.78798854,  0.07542356],
       [ 0.63125205,  0.02959946, -0.86405826, ...,  0.7753299 ,
        -0.7896352 ,  0.07494098],
       [ 0.62970406,  0.03009954, -0.85970324, ...,  0.7678277 ,
        -0.7940083 ,  0.07306255],
       [ 0.62945324,  0.03029537, -0.8586474 , ...,  0.76620984,
        -0.79496247,  0.0725788 ]], dtype=float32)

array([[ 0.6304852 ,  0.02969232, -0.8623169 , ...,  0.7721467 ,
        -0.79149383,  0.0742398 ],
       [ 0.6322219 ,  0.02975531, -0.8657384 , ...,  0.77861   ,
        -0.78773075,  0.07548785],
       [ 0.632756  ,  0.02992215, -0.8665465 , ...,  0.7802305 ,
        -0.78680474,  0.07569592],
       [ 0.63150424,  0.0296172 , -0.86453205, ...,  0.7762409 ,
        -0.789107  ,  0.07510808]], dtype=float32)

array([[ 0.6308732 ,  0.02961412, -0.86326677, ...,  0.7738519 ,
        -0.7904992 ,  0.07463566],
       [ 0.63046825,  0.02969744, -0.86227083, ...,  0.77206606,
        -0.79154086,  0.07421996],
       [ 0.63154393,  0.02962121, -0.86460537, ...,  0.77637935,
        -0.7890227 ,  0.07513541],
       [ 0.6314123 ,  0.02960822, -0.86436427, ...,  0.7759142 ,
        -0.78929406,  0.0750515 ]], dtype=float32)

array([[ 0.629475  ,  0.03027765, -0.85874164, ...,  0.7663516 ,
        -0.7948781 ,  0.07262163],
       [ 0.6294989 ,  0.03025841, -0.8588442 , ...,  0.76650673,
        -0.79478633,  0.07266857],
       [ 0.62956184,  0.03020836, -0.8591121 , ...,  0.766914  ,
        -0.7945452 ,  0.072791  ],
       [ 0.631897  ,  0.02967867, -0.8652142 , ...,  0.7775715 ,
        -0.7883315 ,  0.0753327 ]], dtype=float32)

array([[ 0.63212216,  0.02972974, -0.86558115, ...,  0.77829695,
        -0.78791106,  0.07544301],
       [ 0.6309814 ,  0.02960441, -0.8635039 , ...,  0.7742895 ,
        -0.7902433 ,  0.07472984],
       [ 0.6318291 ,  0.02966508, -0.8651009 , ...,  0.7773468 ,
        -0.7884608 ,  0.0752979 ],
       [ 0.6336504 ,  0.03027808, -0.86779755, ...,  0.782762  ,
        -0.78540075,  0.0759365 ]], dtype=float32)

array([[ 0.6303296 ,  0.0297453 , -0.86188084, ...,  0.7713893 ,
        -0.79193443,  0.07404995],
       [ 0.6306972 ,  0.02964074, -0.86285675, ...,  0.7731063 ,
        -0.79093444,  0.07446814],
       [ 0.6321327 ,  0.02973243, -0.8655982 , ...,  0.77833074,
        -0.7878914 ,  0.07544802],
       [ 0.6319434 ,  0.029688  , -0.8652916 , ...,  0.7777224 ,
        -0.78824276,  0.07535724]], dtype=float32)

array([[ 0.63027716,  0.02976605, -0.8617264 , ...,  0.7711252 ,
        -0.79208803,  0.07398187],
       [ 0.6296586 ,  0.03013363, -0.8595165 , ...,  0.7675371 ,
        -0.79417866,  0.07297672],
       [ 0.6317902 ,  0.02965796, -0.8650355 , ...,  0.7772183 ,
        -0.7885351 ,  0.07527722],
       [ 0.631381  ,  0.02960585, -0.86430544, ...,  0.77580106,
        -0.78935987,  0.07503057]], dtype=float32)

array([[ 0.6320891 ,  0.02972127, -0.8655277 , ...,  0.7781906 ,
        -0.78797203,  0.07542776],
       [ 0.63083607,  0.02961862, -0.86318225, ...,  0.7736973 ,
        -0.7905892 ,  0.07460173],
       [ 0.6318703 ,  0.02967309, -0.86517036, ...,  0.7774838 ,
        -0.78838146,  0.07531975],
       [ 0.6298625 ,  0.02998879, -0.86032414, ...,  0.76880944,
        -0.79343504,  0.07334781]], dtype=float32)

array([[ 0.63005626,  0.02987203, -0.86101973, ...,  0.7699399 ,
        -0.79277706,  0.07366505],
       [ 0.63185006,  0.02966933, -0.86513627, ...,  0.7774174 ,
        -0.7884205 ,  0.0753087 ],
       [ 0.63085365,  0.02961643, -0.8632226 , ...,  0.77377075,
        -0.7905463 ,  0.07461797],
       [ 0.62971985,  0.030088  , -0.8597668 , ...,  0.76792705,
        -0.7939502 ,  0.07309181]], dtype=float32)

array([[ 0.63005203,  0.02987439, -0.8610049 , ...,  0.76991546,
        -0.7927915 ,  0.07365836],
       [ 0.6317439 ,  0.02964981, -0.8649564 , ...,  0.7770627 ,
        -0.7886252 ,  0.07525201],
       [ 0.63119847,  0.02959836, -0.8639529 , ...,  0.77513003,
        -0.789752  ,  0.07490178],
       [ 0.63110656,  0.0295988 , -0.8637668 , ...,  0.77477986,
        -0.7899567 ,  0.07483163]], dtype=float32)

In [26]:
full_run(num_epochs=30, version=0, force_restart=False)

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_transform.bias', 'vocab_projector.bias', 'vocab_projector.weight', 'vocab_layer_norm.bias', 'vocab_transform.weight', 'vocab_layer_norm.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_transform.bias', 'vocab_projector.bias', 'vocab_projector.weight', 'vocab_layer_norm.bias', 'vocab_transform.weight', 'vocab_layer_norm.weight']
- T

None 0
epoch 0 version: 0


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.54315 loss: 3.5147343883627182
train val: 0.4324 loss: 1.780831069996953
test: 0.5075 loss: 1.5715440049499274
epoch time: 19.12min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_0.pkl
best train: 0.54315
best train val: 0.4324
best test: 0.5075
epoch 1 version: 0


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.5034 loss: 1.6662567906881682
train val: 0.666 loss: 0.6149761132508517
test: 0.4926 loss: 0.8193769837677479
epoch time: 22.72min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_1.pkl
best train: 0.5034
best train val: 0.666
best test: 0.4926
epoch 2 version: 0


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.5932 loss: 1.0946836214133537
train val: 0.6654 loss: 0.5613915847253055
test: 0.5871 loss: 0.7020287726283073
epoch time: 24.00min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_1.pkl
best train: 0.5034
best train val: 0.666
best test: 0.4926
epoch 3 version: 0


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.6976 loss: 0.6886984714076155
train val: 0.6783 loss: 0.6680274018595169
test: 0.6124 loss: 0.8057574693717062
epoch time: 24.59min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_3.pkl
best train: 0.6976
best train val: 0.6783
best test: 0.6124
epoch 4 version: 0


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.73415 loss: 0.5340914635046021
train val: 0.705 loss: 0.5449348812529875
test: 0.6062 loss: 0.6889625865072012
epoch time: 24.10min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_4.pkl
best train: 0.73415
best train val: 0.705
best test: 0.6062
epoch 5 version: 0


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.8005 loss: 0.39448593816944005
train val: 0.6869 loss: 0.5391181374797132
test: 0.664 loss: 0.6108955920398236
epoch time: 24.19min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_4.pkl
best train: 0.73415
best train val: 0.705
best test: 0.6062
epoch 6 version: 0


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.814 loss: 0.361622228161893
train val: 0.7149 loss: 0.53299231589392
test: 0.632 loss: 0.6553070891678333
epoch time: 24.12min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v0_lg_0.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_0.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_6.pkl
best train: 0.814
best train val: 0.7149
best test: 0.632
epoch 7 version: 0


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.7882 loss: 0.6471550602907807
train val: 0.6926 loss: 0.600301737936354
test: 0.5934 loss: 0.7255660030975938
epoch time: 23.83min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v0_lg_2.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_2.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_6.pkl
best train: 0.814
best train val: 0.7149
best test: 0.632
epoch 8 version: 0


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.81005 loss: 0.3829603927039773
train val: 0.7011 loss: 0.5018820243503432
test: 0.5993 loss: 0.6633563963532447
epoch time: 23.89min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v0_lg_1.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_1.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_6.pkl
best train: 0.814
best train val: 0.7149
best test: 0.632
epoch 9 version: 0


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.83565 loss: 0.3272408008855768
train val: 0.7 loss: 0.5112185107074678
test: 0.6345 loss: 0.6393793481111526
epoch time: 23.73min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v0_lg_3.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_3.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_6.pkl
best train: 0.814
best train val: 0.7149
best test: 0.632
epoch 10 version: 0


train:   0%|          | 0/20000 [00:00<?, ?it/s]

OSError: [Errno 28] No space left on device

In [None]:
full_run(num_epochs=30, version=5, force_restart=False)

In [None]:
full_run(num_epochs=60, version=-1, force_restart=False)

In [None]:
full_run(num_epochs=60, version=7, force_restart=False)

In [None]:
full_run(num_epochs=60, version=0, force_restart=False)

In [None]:
full_run(num_epochs=60, version=5, force_restart=False)

In [None]:
full_run(num_epochs=90, version=-1, force_restart=False)

In [None]:
full_run(num_epochs=90, version=7, force_restart=False)

In [None]:
full_run(num_epochs=90, version=0, force_restart=False)

In [None]:
full_run(num_epochs=90, version=5, force_restart=False)

In [None]:
full_run(num_epochs=120, version=-1, force_restart=False)

In [None]:
full_run(num_epochs=120, version=7, force_restart=False)

In [None]:
full_run(num_epochs=120, version=0, force_restart=False)

In [None]:
full_run(num_epochs=120, version=5, force_restart=False)