In [1]:
# in a terminal run
# > USER_PATH=/home/krause/userdata/ make run-redis NS=train
# > USER_PATH=/home/krause/userdata/ make run-redis NS=test
# to allow access to the train and test namespaces

In [2]:
import os
import sys
import json
import pandas as pd
import numpy as np
from typing import Literal, TypedDict

In [3]:
sys.path.append("..")
os.environ["USER_PATH"] = "/home/krause/userdata/"
MODEL_OUTPUT_BASE = "/mnt/d/workspace/clotho/notebooks"
MODEL_OUTPUT_CP = os.path.join(MODEL_OUTPUT_BASE, "checkpoints")

In [4]:
from misc.redis import set_redis_slow_mode
from misc.util import highest_number
from misc.io import open_write
from model.datagenerator import create_train_test
from model.transformer_embed import (
    get_epoch_and_load,
    limit_epoch_data,
    limit_epoch_data,
    get_model_filename,
)
from system.namespace.store import get_namespace

In [5]:
import torch

is_cuda = torch.cuda.is_available()
is_cuda

True

In [6]:
set_redis_slow_mode("never")
ns_test = get_namespace("test")
ns_train = get_namespace("train")
now = pd.Timestamp("2022-12-17", tz="UTC")
train_plan = [
    {
        "left": {"mode": "valid", "flip_pc": 1.0},
        "right": {"mode": "valid", "flip_pc": 0.0},
        "min_text_length": 20,
        "skip_weak": True,
        "skip_topics": True,
        "flip_lr": 0.5,
        "first_epoch": 10,
        "last_epoch": None,
        "weight": 50,
    },
    {
        "left": {"mode": "valid", "flip_pc": 1.0},
        "right": {"mode": "valid", "flip_pc": 0.0},
        "min_text_length": None,
        "skip_weak": True,
        "skip_topics": True,
        "flip_lr": 0.5,
        "first_epoch": 10,
        "last_epoch": None,
        "weight": 50,
    },
    {
        "left": {"mode": "random", "flip_pc": 0.0},
        "right": {"mode": "path", "flip_pc": 0.0},
        "min_text_length": 20,
        "skip_weak": True,
        "skip_topics": True,
        "flip_lr": 0.5,
        "first_epoch": None,
        "last_epoch": None,
        "weight": 60,
    },
    {
        "left": None,
        "right": {"mode": "path", "flip_pc": 0.0},
        "min_text_length": 20,
        "skip_weak": True,
        "skip_topics": True,
        "flip_lr": 0.5,
        "first_epoch": None,
        "last_epoch": None,
        "weight": 40,
    },
     {
        "left": {"mode": "random", "flip_pc": 0.0},
        "right": {"mode": "path", "flip_pc": 0.0},
        "min_text_length": None,
        "skip_weak": True,
        "skip_topics": True,
        "flip_lr": 0.5,
        "first_epoch": 5,
        "last_epoch": None,
        "weight": 60,
    },
    {
        "left": None,
        "right": {"mode": "path", "flip_pc": 0.0},
        "min_text_length": None,
        "skip_weak": True,
        "skip_topics": True,
        "flip_lr": 0.5,
        "first_epoch": 5,
        "last_epoch": None,
        "weight": 40,
    },
    {
        "left": {"mode": "random", "flip_pc": 0.0},
        "right": {"mode": "valid", "flip_pc": 0.0},
        "min_text_length": None,
        "skip_weak": True,
        "skip_topics": True,
        "flip_lr": 0.5,
        "first_epoch": None,
        "last_epoch": None,
        "weight": 60,
    },
    {
        "left": None,
        "right": {"mode": "valid", "flip_pc": 0.0},
        "min_text_length": None,
        "skip_weak": True,
        "skip_topics": True,
        "flip_lr": 0.5,
        "first_epoch": None,
        "last_epoch": None,
        "weight": 40,
    },
    {
        "left": {"mode": "valid", "flip_pc": 1.0},
        "right": {"mode": "valid", "flip_pc": 0.0},
        "min_text_length": None,
        "skip_weak": True,
        "skip_topics": True,
        "flip_lr": 0.5,
        "first_epoch": 15,
        "last_epoch": None,
        "weight": 50,
    }
]
eval_plan = [
    {
        "left": {"mode": "random", "flip_pc": 0.0},
        "right": {"mode": "valid", "flip_pc": 0.0},
        "min_text_length": 20,
        "skip_weak": True,
        "skip_topics": True,
        "flip_lr": 0.5,
        "weight": 60,
    },
    {
        "left": None,
        "right": {"mode": "valid", "flip_pc": 0.0},
        "min_text_length": 20,
        "skip_weak": True,
        "skip_topics": True,
        "flip_lr": 0.5,
        "weight": 40,
    },
    {
        "left": {"mode": "random", "flip_pc": 0.0},
        "right": {"mode": "valid", "flip_pc": 0.0},
        "min_text_length": None,
        "skip_weak": True,
        "skip_topics": True,
        "flip_lr": 0.5,
        "weight": 60,
    },
    {
        "left": None,
        "right": {"mode": "valid", "flip_pc": 0.0},
        "min_text_length": None,
        "skip_weak": True,
        "skip_topics": True,
        "flip_lr": 0.5,
        "weight": 40,
    },
]
ttgen = create_train_test(
    train_ns=ns_train,
    train_validation_ns=ns_train,
    test_ns=ns_test,
    test_validation_ns=ns_test,
    train_learning_plan=train_plan,
    train_val_learning_plan=eval_plan,
    test_learning_plan=eval_plan,
    test_val_learning_plan=eval_plan,
    batch_size=4 if is_cuda else 8,
    epoch_batches=5000 if is_cuda else 500,
    train_val_size=10000 if is_cuda else 1000,
    test_size=10000 if is_cuda else 1000,
    test_val_size=10000 if is_cuda else 1000,
    compute_batch_size=100 if is_cuda else 100,
    now=now)

In [7]:
import torch.nn as nn
from torch.optim import AdamW
from transformers import DistilBertTokenizer, DistilBertModel

In [8]:
device = torch.device("cuda") if is_cuda else torch.device("cpu")
device

device(type='cuda')

In [9]:
ProviderRole = Literal["child", "parent"]

tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
EMBED_SIZE = 768

TokenizedInput = TypedDict('TokenizedInput', {
    "input_ids": torch.Tensor,
    "attention_mask": torch.Tensor,
})


AggType = Literal["cls", "mean"]
AGG_CLS: AggType = "cls"
AGG_MEAN: AggType = "mean"


def tokens(texts: list[str]) -> TokenizedInput:
    res = tokenizer(texts.tolist(), return_tensors="pt", padding=True, truncation=True)
    return {k: v.to(device) for k, v in res.items()}


class Noise(nn.Module):
    def __init__(self, std: float = 1.0, p: float = 0.5) -> None:
        super().__init__()
        self._std = std
        self._p = p
        self._dhold = nn.Parameter(torch.Tensor([0.0]), requires_grad=False)

    def set_std(self, std: float) -> None:
        self._std = std

    def get_std(self) -> float:
        return self._std

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        if not self.training:
            return x
        prob = torch.rand(size=x.shape, device=self._dhold.device) < self._p
        gauss = torch.normal(
            mean=0.0, std=self._std, size=x.shape, device=self._dhold.device)
        return x + prob * gauss


class Model(nn.Module):
    def __init__(self, version: int) -> None:
        super().__init__()
        self._bert_parent = DistilBertModel.from_pretrained(
            "distilbert-base-uncased")
        self._bert_child = DistilBertModel.from_pretrained(
            "distilbert-base-uncased")
        if version in (1, 3, 4, 6):
            self._pdense: nn.Sequential | None = nn.Sequential(
                nn.Linear(EMBED_SIZE, EMBED_SIZE),
                nn.Dropout(p=0.2),
                nn.ReLU(),
                nn.Linear(EMBED_SIZE, EMBED_SIZE))
            self._cdense: nn.Sequential | None = nn.Sequential(
                nn.Linear(EMBED_SIZE, EMBED_SIZE),
                nn.Dropout(p=0.2),
                nn.ReLU(),
                nn.Linear(EMBED_SIZE, EMBED_SIZE))
        else:
            self._pdense = None
            self._cdense = None
        if version < 4 or version > 5:
            self._noise = None
        else:
            self._noise = Noise(std=1.0, p=0.2)
        if version < 2 or version > 4:
            self._cos = None
        else:
            self._cos = torch.nn.CosineSimilarity()
        if version < 6:
            self._agg = AGG_CLS
        else:
            self._agg = AGG_MEAN
        self._version = version

    def set_epoch(self, epoch: int) -> None:
        noise = self._noise
        if noise is not None:
            noise.set_std(1 / (1.2 ** epoch))

    def get_version(self) -> int:
        return self._version
    
    def get_agg(self, lhs: torch.Tensor) -> torch.Tensor:
        if self._agg == AGG_CLS:
            return lhs[:, 0]
        if self._agg == AGG_MEAN:
            return torch.mean(lhs, dim=1)
        raise ValueError(f"unknown aggregation: {self._agg}")

    def get_parent_embed(
            self,
            input_ids: torch.Tensor,
            attention_mask: torch.Tensor) -> torch.Tensor:
        outputs_parent = self._bert_parent(
            input_ids=input_ids, attention_mask=attention_mask)
        out = self.get_agg(outputs_parent.last_hidden_state)
        if self._pdense is not None:
            out = self._pdense(out)
        if self._noise is not None:
            out = self._noise(out)
        return out

    def get_child_embed(
            self,
            input_ids: torch.Tensor,
            attention_mask: torch.Tensor) -> torch.Tensor:
        outputs_child = self._bert_child(
            input_ids=input_ids, attention_mask=attention_mask)
        out = self.get_agg(outputs_child.last_hidden_state)
        if self._cdense is not None:
            out = self._cdense(out)
        if self._noise is not None:
            out = self._noise(out)
        return out

    def forward(self, x: dict[ProviderRole, TokenizedInput]) -> torch.Tensor:
        parent_cls = self.get_parent_embed(
            input_ids=x["parent"]["input_ids"],
            attention_mask=x["parent"]["attention_mask"])
        child_cls = self.get_child_embed(
            input_ids=x["child"]["input_ids"],
            attention_mask=x["child"]["attention_mask"])
        if self._cos is not None:
            return self._cos(parent_cls, child_cls).reshape([-1, 1])
        batch_size = parent_cls.shape[0]
        return torch.bmm(
            parent_cls.reshape([batch_size, 1, -1]),
            child_cls.reshape([batch_size, -1, 1])).reshape([-1, 1])


class TrainingHarness(nn.Module):
    def __init__(self, model: Model) -> None:
        super().__init__()
        self._model = model
        self._softmax = nn.Softmax(dim=1)
        self._loss = nn.BCELoss()

    def get_version(self) -> int:
        return self._model.get_version()

    def forward(
            self,
            left: TokenizedInput,
            right: TokenizedInput,
            labels: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
        out_left = self._model(left)
        out_right = self._model(right)
        preds = self._softmax(torch.hstack((out_left, out_right)))
        return preds, self._loss(preds, labels)

In [10]:
from transformers import get_scheduler
# from tqdm.notebook import tqdm
from tqdm.auto import tqdm
import evaluate
import time


def compute(harness, df):
    plefts = tokens(df["parent_left"])
    clefts = tokens(df["child_left"])
    prights = tokens(df["parent_right"])
    crights = tokens(df["child_right"])
    labels = torch.tensor(
        [~df["correct_is_right"], df["correct_is_right"]],
        dtype=torch.float32).T.to(device)
    return harness(
        left={"parent": plefts, "child": clefts},
        right={"parent": prights, "child": crights},
        labels=labels)


def run_training(num_epochs, version, force_restart):
    model = Model(version=version)
    model.to(device)
    harness = TrainingHarness(model)
    harness.to(device)

    mprev, epoch_offset = get_epoch_and_load(
        harness,
        MODEL_OUTPUT_CP,
        ftype="harness",
        is_cuda=is_cuda,
        device=device,
        force_restart=force_restart)

    optimizer = AdamW(harness.parameters(), lr=5e-5)
    print(mprev, epoch_offset)
    
    num_epochs -= epoch_offset
    if num_epochs <= 0:
        print("already computed all epochs. nothing to do!")
        return model, harness, optimizer
    
    num_training_steps = num_epochs * ttgen.get_epoch_train_size()
    warmup = 10000 if is_cuda else 10
    lr_scheduler = get_scheduler(
        name="linear",
        optimizer=optimizer,
        num_warmup_steps=warmup,
        num_training_steps=num_training_steps - warmup)
    ttgen.set_epoch(epoch_offset)
    
    log_csv = get_model_filename(
        harness,
        MODEL_OUTPUT_BASE,
        is_cuda=is_cuda,
        ftype="val_log",
        epoch=None,
        ext=".csv")
    columns = [
        "epoch",
        "train_acc",
        "train_loss",
        "train_val_acc",
        "train_val_loss",
        "test_acc",
        "test_loss",
        "time",
        "version",
        "fname",
    ]
    if not os.path.exists(log_csv):
        pd.DataFrame([], columns=columns).to_csv(
            log_csv, header=True, mode="w", columns=columns)

    for _ in range(num_epochs):
        epoch = ttgen.get_epoch()
        print(f"epoch {epoch} version: {harness.get_version()}")
        real_time = time.monotonic()

        model.train()
        harness.train()
        model.set_epoch(epoch)
        metric_train = evaluate.load("accuracy")
        train_loss = []
        first = True
        with tqdm(desc="train", total=ttgen.get_epoch_train_size()) as progress_bar:
            for train_df in ttgen.train_dfs():
                preds, loss = compute(harness, train_df)
                train_loss.append(loss.item())
                loss.backward()

                optimizer.step()
                lr_scheduler.step()
                optimizer.zero_grad()
                progress_bar.update(train_df.shape[0])

                predictions = torch.argmax(preds, dim=-1)
                metric_train.add_batch(
                    predictions=predictions,
                    references=train_df["correct_is_right"].astype(int))
                if first:
                    # display(train_df)
                    first = False

        model_fname = get_model_filename(
            harness,
            MODEL_OUTPUT_CP,
            is_cuda=is_cuda,
            ftype="harness",
            epoch=epoch)
        torch.save(harness.state_dict(), model_fname)

        model.eval()
        harness.eval()
        with torch.no_grad():
            metric_val_train = evaluate.load("accuracy")
            train_val_loss = []
            with tqdm(desc="train val", total=ttgen.get_epoch_train_validation_size()) as progress_bar:
                for train_validation_df in ttgen.train_validation_dfs():
                    preds, loss = compute(harness, train_validation_df)
                    train_val_loss.append(loss.item())
                    predictions = torch.argmax(preds, dim=-1)
                    metric_val_train.add_batch(
                        predictions=predictions,
                        references=train_validation_df["correct_is_right"].astype(int))
                    progress_bar.update(train_validation_df.shape[0])

            metric_test = evaluate.load("accuracy")
            test_loss = []
            with tqdm(desc="test", total=ttgen.get_epoch_test_size()) as progress_bar:
                for test_df in ttgen.test_dfs():
                    preds, loss = compute(harness, test_df)
                    test_loss.append(loss.item())
                    predictions = torch.argmax(preds, dim=-1)
                    metric_test.add_batch(
                        predictions=predictions,
                        references=test_df["correct_is_right"].astype(int))
                    progress_bar.update(test_df.shape[0])
            stats = {
                "epoch": int(epoch),
                "train_acc": float(metric_train.compute()['accuracy']),
                "train_loss": float(np.mean(train_loss)),
                "train_val_acc": float(metric_val_train.compute()['accuracy']),
                "train_val_loss": float(np.mean(train_val_loss)),
                "test_acc": float(metric_test.compute()['accuracy']),
                "test_loss": float(np.mean(test_loss)),
                "time": 0.0,
                "version": harness.get_version(),
                "fname": model_fname,
            }

        print(f"train: {stats['train_acc']} loss: {stats['train_loss']}")
        print(f"train val: {stats['train_val_acc']} loss: {stats['train_val_loss']}")
        print(f"test: {stats['test_acc']} loss: {stats['test_loss']}")
        ttgen.advance_epoch()
        stats["time"] = float((time.monotonic() - real_time) / 60.0)
        print(f"epoch time: {stats['time']:.2f}min")
        stats_fn = get_model_filename(
            harness,
            MODEL_OUTPUT_CP,
            is_cuda=is_cuda,
            ftype="stats",
            epoch=epoch,
            ext=".json")
        with open_write(stats_fn, text=True) as fout:
            print(json.dumps(stats, indent=2, sort_keys=True), file=fout)
        stats_df = pd.DataFrame(
            {key: [val] for key, val in stats.items()},
            columns=columns)
        stats_df.to_csv(
            log_csv, header=False, mode="a")
            
        limit_epoch_data(
            harness,
            MODEL_OUTPUT_CP,
            is_cuda=is_cuda,
            ftype="stats",
            ext=".json",
            count=5)
    return model, harness, optimizer

In [11]:
def save_model(model, harness, optimizer):
    torch.save(model.state_dict(), get_model_filename(
        harness,
        MODEL_OUTPUT_BASE,
        is_cuda=is_cuda,
        ftype="model",
        epoch=None))
    torch.save(harness.state_dict(), get_model_filename(
        harness,
        MODEL_OUTPUT_BASE,
        is_cuda=is_cuda,
        ftype="harness",
        epoch=None))
    torch.save(optimizer.state_dict(), get_model_filename(
        harness,
        MODEL_OUTPUT_BASE,
        is_cuda=is_cuda,
        ftype="optimizer",
        epoch=None))

In [12]:
def validation(model, harness):
    ttgen.reset()
    model.eval()
    harness.eval()
    dfs = []
    with torch.no_grad():
        metric_val_test = evaluate.load("accuracy")
        test_val_loss = []
        with tqdm(desc="test val", total=ttgen.get_epoch_test_validation_size()) as progress_bar:
            for test_val_df in ttgen.test_validation_dfs():
                preds, loss = compute(harness, test_val_df)
                test_val_loss.append(loss.item())
                predictions = torch.argmax(preds, dim=-1)
                metric_val_test.add_batch(
                    predictions=predictions,
                    references=test_val_df["correct_is_right"].astype(int))
                cur_df = test_val_df.copy()
                cur_df["logit_left"] = preds[:, 0].cpu()
                cur_df["logit_right"] = preds[:, 1].cpu()
                cur_df["preds"] = predictions.cpu()
                cur_df["truth"] = test_val_df["correct_is_right"].astype(int)
                dfs.append(cur_df)
                progress_bar.update(test_val_df.shape[0])
    print(f"test val: {metric_val_test.compute()} loss: {np.mean(test_val_loss)}")
    validation_df = pd.concat(dfs)
    validation_df.to_csv(get_model_filename(
        harness,
        MODEL_OUTPUT_BASE,
        is_cuda=is_cuda,
        ftype="validation",
        epoch=None,
        ext=".csv"))
    print("correct")
    display(validation_df[validation_df["preds"] == validation_df["truth"]].head())
    print("incorrect")
    display(validation_df[validation_df["preds"] != validation_df["truth"]].head())

In [13]:
def embeds(model):
    ttgen.reset()
    model.eval()
    with torch.no_grad():
        count = 0
        for test_val_df in ttgen.test_validation_dfs():
            plefts = tokens(test_val_df["parent_left"])
            clefts = tokens(test_val_df["child_left"])
            prights = tokens(test_val_df["parent_right"])
            crights = tokens(test_val_df["child_right"])
            display(model.get_child_embed(
                clefts["input_ids"],
                clefts["attention_mask"]).cpu().numpy())
            display(model.get_child_embed(
                crights["input_ids"],
                crights["attention_mask"]).cpu().numpy())
            count += 1
            if count >= 5:
                break

In [14]:
def full_run(*, num_epochs, version, force_restart):
    model, harness, optimizer = run_training(
        num_epochs, version, force_restart)
    save_model(model, harness, optimizer)
    validation(model, harness)
    embeds(model)

In [15]:
for version in range(8):
    full_run(num_epochs=30, version=version, force_restart=True)

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_transform.bias', 'vocab_projector.bias', 'vocab_transform.weight', 'vocab_layer_norm.bias', 'vocab_layer_norm.weight', 'vocab_projector.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_transform.bias', 'vocab_projector.bias', 'vocab_transform.weight', 'vocab_layer_norm.bias', 'vocab_layer_norm.weight', 'vocab_projector.weight']
- T

None 0
epoch 0 version: 0


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.55935 loss: 3.352052914298071
train val: 0.5942 loss: 0.9736679400078952
test: 0.6044 loss: 0.9560981833364814
epoch time: 382.00min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_0.pkl
best train: 0.55935
best train val: 0.5942
best test: 0.6044
epoch 1 version: 0


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.65155 loss: 0.8869360913270153
train val: 0.7179 loss: 0.5731456519750878
test: 0.6461 loss: 0.7169723329342902
epoch time: 13.21min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_1.pkl
best train: 0.65155
best train val: 0.7179
best test: 0.6461
epoch 2 version: 0


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.6355 loss: 1.0218310791182796
train val: 0.5845 loss: 0.6663006847500801
test: 0.5182 loss: 0.7017655182123185
epoch time: 13.14min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_1.pkl
best train: 0.65155
best train val: 0.7179
best test: 0.6461
epoch 3 version: 0


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.54475 loss: 1.3130267222916707
train val: 0.6596 loss: 0.5963076619826257
test: 0.561 loss: 0.7149791924297809
epoch time: 13.45min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_1.pkl
best train: 0.65155
best train val: 0.7179
best test: 0.6461
epoch 4 version: 0


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.67095 loss: 0.7214475464519579
train val: 0.6756 loss: 0.5346202725146897
test: 0.5576 loss: 0.6934523519396782
epoch time: 13.55min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_1.pkl
best train: 0.65155
best train val: 0.7179
best test: 0.6461
epoch 5 version: 0


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.76895 loss: 0.46139734542614314
train val: 0.6768 loss: 0.5350437356937676
test: 0.5693 loss: 0.6968949237704277
epoch time: 13.51min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_1.pkl
best train: 0.65155
best train val: 0.7179
best test: 0.6461
epoch 6 version: 0


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.7961 loss: 0.4247380390111655
train val: 0.6874 loss: 0.5878600777112646
test: 0.5702 loss: 0.7324761573195457
epoch time: 13.16min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v0_lg_2.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_2.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_1.pkl
best train: 0.65155
best train val: 0.7179
best test: 0.6461
epoch 7 version: 0


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.81125 loss: 0.3938406159650538
train val: 0.6868 loss: 0.5700957158848643
test: 0.5654 loss: 0.7661647034734488
epoch time: 13.26min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v0_lg_0.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_0.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_1.pkl
best train: 0.65155
best train val: 0.7179
best test: 0.6461
epoch 8 version: 0


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.82225 loss: 0.34318834802054204
train val: 0.6895 loss: 0.5366566250034259
test: 0.5538 loss: 0.7296822218716145
epoch time: 13.40min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v0_lg_3.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_3.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_1.pkl
best train: 0.65155
best train val: 0.7179
best test: 0.6461
epoch 9 version: 0


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.83355 loss: 0.3647210231402678
train val: 0.6978 loss: 0.5280818137230352
test: 0.566 loss: 0.732627209085226
epoch time: 13.67min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v0_lg_4.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_4.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_1.pkl
best train: 0.65155
best train val: 0.7179
best test: 0.6461
epoch 10 version: 0


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.8256 loss: 0.342054622005063
train val: 0.7155 loss: 0.49610226929080675
test: 0.5969 loss: 0.6681984926581382
epoch time: 13.41min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v0_lg_5.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_5.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_1.pkl
best train: 0.65155
best train val: 0.7179
best test: 0.6461
epoch 11 version: 0


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.78525 loss: 0.7765342488288921
train val: 0.7054 loss: 0.5095887706808367
test: 0.5959 loss: 0.6755767182707787
epoch time: 13.87min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v0_lg_7.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_7.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_1.pkl
best train: 0.65155
best train val: 0.7179
best test: 0.6461
epoch 12 version: 0


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.8275 loss: 0.365783574446039
train val: 0.7132 loss: 0.5009044643007219
test: 0.5968 loss: 0.6688777284383773
epoch time: 13.57min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v0_lg_6.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_6.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_1.pkl
best train: 0.65155
best train val: 0.7179
best test: 0.6461
epoch 13 version: 0


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.83435 loss: 0.35041186180261064
train val: 0.7227 loss: 0.5615974120778264
test: 0.5811 loss: 0.6910121179521084
epoch time: 13.62min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v0_lg_8.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_8.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_13.pkl
best train: 0.83435
best train val: 0.7227
best test: 0.5811
epoch 14 version: 0


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.8354 loss: 0.3965080950863377
train val: 0.7262 loss: 0.5484570084583363
test: 0.5876 loss: 0.6957989581763744
epoch time: 13.66min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v0_lg_9.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_9.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_14.pkl
best train: 0.8354
best train val: 0.7262
best test: 0.5876
epoch 15 version: 0


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.83485 loss: 0.39182786592188773
train val: 0.731 loss: 0.4888885040677269
test: 0.6053 loss: 0.6751552597522735
epoch time: 13.88min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v0_lg_11.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_11.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_15.pkl
best train: 0.83485
best train val: 0.731
best test: 0.6053
epoch 16 version: 0


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.83255 loss: 0.6108165907337845
train val: 0.7305 loss: 0.5062134432052727
test: 0.603 loss: 0.6977322362244129
epoch time: 13.84min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v0_lg_12.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_12.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_15.pkl
best train: 0.83485
best train val: 0.731
best test: 0.6053
epoch 17 version: 0


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.8401 loss: 0.3713691776767191
train val: 0.7251 loss: 0.49837933776173743
test: 0.6221 loss: 0.6784345151007175
epoch time: 13.77min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v0_lg_10.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_10.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_15.pkl
best train: 0.83485
best train val: 0.731
best test: 0.6053
epoch 18 version: 0


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.84665 loss: 0.36494319473185927
train val: 0.7357 loss: 0.4863825129638892
test: 0.624 loss: 0.6481490107417107
epoch time: 13.84min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v0_lg_1.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_1.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_18.pkl
best train: 0.84665
best train val: 0.7357
best test: 0.624
epoch 19 version: 0


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.84995 loss: 0.30079699465904075
train val: 0.7311 loss: 0.562404506408464
test: 0.5822 loss: 0.7214083472967148
epoch time: 13.70min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v0_lg_13.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_13.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_18.pkl
best train: 0.84665
best train val: 0.7357
best test: 0.624
epoch 20 version: 0


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.8477 loss: 0.29592661397925585
train val: 0.7339 loss: 0.48284582288526
test: 0.6145 loss: 0.6618737086594105
epoch time: 13.72min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v0_lg_17.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_17.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_18.pkl
best train: 0.84665
best train val: 0.7357
best test: 0.624
epoch 21 version: 0


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.85095 loss: 0.5730828672718846
train val: 0.7406 loss: 0.48914581016413866
test: 0.6129 loss: 0.6623505784809589
epoch time: 13.68min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v0_lg_14.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_14.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_21.pkl
best train: 0.85095
best train val: 0.7406
best test: 0.6129
epoch 22 version: 0


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.8527 loss: 0.3591562378388204
train val: 0.7396 loss: 0.4848551947860047
test: 0.6098 loss: 0.6824743772029876
epoch time: 13.80min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v0_lg_16.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_16.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_21.pkl
best train: 0.85095
best train val: 0.7406
best test: 0.6129
epoch 23 version: 0


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.84675 loss: 0.3235592260357171
train val: 0.7352 loss: 0.49281299302978443
test: 0.5912 loss: 0.7220216275990009
epoch time: 14.07min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v0_lg_15.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_15.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_21.pkl
best train: 0.85095
best train val: 0.7406
best test: 0.6129
epoch 24 version: 0


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.86085 loss: 0.2855711382955538
train val: 0.7401 loss: 0.47013885506542863
test: 0.6019 loss: 0.6613961251437664
epoch time: 13.66min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v0_lg_19.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_19.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_21.pkl
best train: 0.85095
best train val: 0.7406
best test: 0.6129
epoch 25 version: 0


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.8613 loss: 0.2891033058916529
train val: 0.747 loss: 0.5092574126703665
test: 0.6089 loss: 0.6960500382840633
epoch time: 13.33min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v0_lg_20.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_20.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_25.pkl
best train: 0.8613
best train val: 0.747
best test: 0.6089
epoch 26 version: 0


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.84485 loss: 1.7154595810871502
train val: 0.7482 loss: 0.4683800053209241
test: 0.597 loss: 0.6755780146956444
epoch time: 13.31min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v0_lg_23.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_23.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_26.pkl
best train: 0.84485
best train val: 0.7482
best test: 0.597
epoch 27 version: 0


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.86105 loss: 0.28024927469551875
train val: 0.7511 loss: 0.46480224946480303
test: 0.6041 loss: 0.672553276014328
epoch time: 13.85min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v0_lg_18.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_18.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_27.pkl
best train: 0.86105
best train val: 0.7511
best test: 0.6041
epoch 28 version: 0


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.86205 loss: 0.2749132668401202
train val: 0.7438 loss: 0.47114133361568095
test: 0.5964 loss: 0.6830610546827316
epoch time: 13.67min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v0_lg_22.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_22.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_27.pkl
best train: 0.86105
best train val: 0.7511
best test: 0.6041
epoch 29 version: 0


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.86815 loss: 0.2677014533635284
train val: 0.7478 loss: 0.46723010782357305
test: 0.6085 loss: 0.6893656387925148
epoch time: 13.75min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v0_lg_24.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_24.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v0_lg_27.pkl
best train: 0.86105
best train val: 0.7511
best test: 0.6041


test val:   0%|          | 0/10000 [00:00<?, ?it/s]

test val: {'accuracy': 0.612} loss: 0.6797253951728344
correct


Unnamed: 0,gen_name,parent_left,child_left,parent_right,child_right,sway_left,sway_right,correct_is_right,logit_left,logit_right,preds,truth
0,!copy--valid;(mtl:20);(sw);(st),Gonna ride the baloney pony all the way to Was...,I'd literally take the law into my own hands.,I'd literally take the law into my own hands.,Gonna ride the baloney pony all the way to Was...,4.662937e-15,1.0,True,0.435376,0.564624,1,1
3,random--valid;(sw);(st),I always brush my teeth but need to floss more...,As an american all I can say is: why?,[removed],User profile checks out. Bet you're on a list ...,0.1192029,0.8807971,True,0.005049,0.994951,1,1
0,*valid--random;(sw);(st),And they taste delicious when chopped into hal...,"In Spain/Portugal we also cook them ""a feira"" ...",Seinfeld seems to be the one that paved the wa...,Wiping down gym equipment.,0.9933071,0.006692851,False,0.712754,0.287246,0,0
1,*valid--!copy;(mtl:20);(sw);(st),Yeah that's a thing. I spend my summer vacatio...,you ever done on going maintenance on a boat? ...,you ever done on going maintenance on a boat? ...,Yeah that's a thing. I spend my summer vacatio...,1.0,1.5628820000000001e-18,False,0.765095,0.234905,0,0
2,*valid--!copy;(sw);(st),She actually has a song where this is the poin...,"Yep, this one came to mind. She is incredible ...","Yep, this one came to mind. She is incredible ...",She actually has a song where this is the poin...,0.9975274,0.002472623,False,0.760728,0.239272,0,0


incorrect


Unnamed: 0,gen_name,parent_left,child_left,parent_right,child_right,sway_left,sway_right,correct_is_right,logit_left,logit_right,preds,truth
1,*valid--!copy;(mtl:20);(sw);(st),Men are still plenty selective about who they ...,"Men are selective, as in, they'll select from ...","Men are selective, as in, they'll select from ...",Men are still plenty selective about who they ...,0.999877,0.000123,False,0.346787,0.653213,1,0
2,random--valid;(sw);(st),Dungeons and Dragons,Did you see this post a week ago and got jealo...,"Question for the men, what's the worst part of...",When sitting wrong on your bike seat and cutti...,0.119203,0.880797,True,0.661878,0.338122,0,1
0,random--valid;(mtl:20);(sw);(st),So that's where it came from albeit it's still...,Yeah that's pretty much what he did. Epstein h...,I work in a kitchen full of men and it's quite...,Tears are for the French,0.000911,0.999089,True,0.513867,0.486133,0,1
1,random--valid;(mtl:20);(sw);(st),I am uncomfortably comfortable not trying.,They're slightly better than nickleback. Haha,"US uses 120 VAC (RMS), so the wattage is half ...",Interesting!! I came into this not knowing muc...,0.119203,0.880797,True,0.715443,0.284557,0,1
2,*valid--!copy;(sw);(st),I called the number at the end of his addictio...,Right. He's the real deal. He will answer his ...,Right. He's the real deal. He will answer his ...,I called the number at the end of his addictio...,1.0,0.0,False,0.377781,0.622219,1,0


array([[-0.2504432 ,  0.26919276,  0.25542802, ...,  0.19207422,
         0.05956524, -0.0959368 ],
       [-0.2504815 ,  0.27079436,  0.2581747 , ...,  0.18608788,
         0.05706534, -0.09215498],
       [-0.2504617 ,  0.2698929 ,  0.25662425, ...,  0.18947943,
         0.05848274, -0.09429861],
       [-0.25049344,  0.2715137 ,  0.25942034, ...,  0.18333705,
         0.05591338, -0.090414  ]], dtype=float32)

array([[-0.25047064,  0.27027607,  0.2572818 , ...,  0.18804526,
         0.05788374, -0.09339242],
       [-0.25047523,  0.27048683,  0.25764447, ...,  0.1872518 ,
         0.05755213, -0.09289098],
       [-0.25048962,  0.27126357,  0.2589863 , ...,  0.18429826,
         0.05631619, -0.09102268],
       [-0.25045753,  0.26972583,  0.25633833, ...,  0.1901015 ,
         0.05874242, -0.09469149]], dtype=float32)

array([[-0.25049362,  0.27152923,  0.25944757, ...,  0.1832769 ,
         0.0558882 , -0.09037599],
       [-0.250404  ,  0.26791093,  0.25325304, ...,  0.19674474,
         0.06150976, -0.09888262],
       [-0.25046986,  0.27024305,  0.25722528, ...,  0.18816887,
         0.05793539, -0.0934706 ],
       [-0.25045317,  0.26955962,  0.2560539 , ...,  0.1907189 ,
         0.05900013, -0.0950814 ]], dtype=float32)

array([[-0.25045547,  0.2696493 ,  0.2562074 , ...,  0.19038591,
         0.05886114, -0.09487112],
       [-0.2504778 ,  0.2706087 ,  0.2578545 , ...,  0.18679121,
         0.0573597 , -0.09259986],
       [-0.25050092,  0.27218696,  0.26059476, ...,  0.18072118,
         0.05481599, -0.08875631],
       [-0.2504704 ,  0.2702645 ,  0.2572621 , ...,  0.1880885 ,
         0.05790183, -0.09341978]], dtype=float32)

array([[-0.2503862 ,  0.26738137,  0.25235963, ...,  0.19864725,
         0.06230053, -0.10008163],
       [-0.25039077,  0.26751366,  0.25258264, ...,  0.19817339,
         0.06210356, -0.09978299],
       [-0.25046727,  0.27012733,  0.25702637, ...,  0.1886033 ,
         0.05811691, -0.09374516],
       [-0.25043592,  0.26893696,  0.25499257, ...,  0.19301385,
         0.05995691, -0.09652983]], dtype=float32)

array([[-0.25043687,  0.26897165,  0.25505173, ...,  0.19288637,
         0.05990376, -0.09644944],
       [-0.25039795,  0.26772612,  0.25294092, ...,  0.19741045,
         0.06178648, -0.09930222],
       [-0.25045383,  0.26958495,  0.25609693, ...,  0.19062567,
         0.05896135, -0.09502292],
       [-0.25044852,  0.26938704,  0.25575933, ...,  0.1913574 ,
         0.05926645, -0.09548447]], dtype=float32)

array([[-0.25043103,  0.26877353,  0.2547149 , ...,  0.19361201,
         0.06020606, -0.09690728],
       [-0.25047314,  0.27038756,  0.2574734 , ...,  0.1876261 ,
         0.05770862, -0.09312756],
       [-0.25048196,  0.2708212 ,  0.25822115, ...,  0.18598558,
         0.05702266, -0.09209041],
       [-0.25042763,  0.26865885,  0.25452003, ...,  0.19403128,
         0.06038073, -0.09717172]], dtype=float32)

array([[-0.2504797 ,  0.2707029 ,  0.25801662, ...,  0.1864342 ,
         0.05721049, -0.0923745 ],
       [-0.25044444,  0.26923636,  0.2555021 , ...,  0.19191384,
         0.05949843, -0.09583563],
       [-0.25043994,  0.26907888,  0.25523406, ...,  0.19249283,
         0.05973979, -0.09620109],
       [-0.2504905 ,  0.2713167 ,  0.25907856, ...,  0.18409449,
         0.05623079, -0.09089358]], dtype=float32)

array([[-0.2504565 ,  0.2696903 ,  0.25627694, ...,  0.19023363,
         0.05879777, -0.09477506],
       [-0.25044593,  0.26929295,  0.25559872, ...,  0.19170508,
         0.05941139, -0.09570391],
       [-0.2504642 ,  0.26999548,  0.25680014, ...,  0.18909653,
         0.0583228 , -0.09405667],
       [-0.25046283,  0.26994082,  0.25670645, ...,  0.18930066,
         0.05840812, -0.09418573]], dtype=float32)

array([[-0.2504599 ,  0.26982254,  0.2565038 , ...,  0.1897418 ,
         0.05859233, -0.09446436],
       [-0.25044852,  0.26938686,  0.255759  , ...,  0.1913581 ,
         0.05926671, -0.0954849 ],
       [-0.2504683 ,  0.2701725 ,  0.25710398, ...,  0.18843375,
         0.05804602, -0.093638  ],
       [-0.25047782,  0.2706095 ,  0.2578558 , ...,  0.18678829,
         0.05735826, -0.0925978 ]], dtype=float32)

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_transform.bias', 'vocab_projector.bias', 'vocab_transform.weight', 'vocab_layer_norm.bias', 'vocab_layer_norm.weight', 'vocab_projector.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_transform.bias', 'vocab_projector.bias', 'vocab_transform.weight', 'vocab_layer_norm.bias', 'vocab_layer_norm.weight', 'vocab_projector.weight']
- T

None 0
epoch 0 version: 1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.7132 loss: 0.5017334903860697
train val: 0.7246 loss: 0.4846846006604377
test: 0.6305 loss: 0.6252342349648475
epoch time: 12.14min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_0.pkl
best train: 0.7132
best train val: 0.7246
best test: 0.6305
epoch 1 version: 1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.77765 loss: 0.42867201781437037
train val: 0.7201 loss: 0.49862171598263083
test: 0.6293 loss: 0.6392749866127968
epoch time: 12.31min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_0.pkl
best train: 0.7132
best train val: 0.7246
best test: 0.6305
epoch 2 version: 1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.7915 loss: 0.41096946440601023
train val: 0.7226 loss: 0.5112191793816164
test: 0.5878 loss: 0.7020967981636524
epoch time: 12.02min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_0.pkl
best train: 0.7132
best train val: 0.7246
best test: 0.6305
epoch 3 version: 1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.7963 loss: 0.3917966575597064
train val: 0.7087 loss: 0.5068314800444059
test: 0.6296 loss: 0.6343447849214077
epoch time: 11.73min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_0.pkl
best train: 0.7132
best train val: 0.7246
best test: 0.6305
epoch 4 version: 1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.7967 loss: 0.38790015840873093
train val: 0.7404 loss: 0.4756645157625899
test: 0.6238 loss: 0.6404555241942406
epoch time: 11.82min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_4.pkl
best train: 0.7967
best train val: 0.7404
best test: 0.6238
epoch 5 version: 1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.8279 loss: 0.34107063434466833
train val: 0.7078 loss: 0.5287012936805506
test: 0.6227 loss: 0.6568252017617225
epoch time: 12.75min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_4.pkl
best train: 0.7967
best train val: 0.7404
best test: 0.6238
epoch 6 version: 1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.84185 loss: 0.31689761902048413
train val: 0.6997 loss: 0.5401779724892461
test: 0.5899 loss: 0.6952695739090443
epoch time: 11.92min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v1_lg_5.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_5.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_4.pkl
best train: 0.7967
best train val: 0.7404
best test: 0.6238
epoch 7 version: 1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.8413 loss: 0.3143098585572157
train val: 0.7183 loss: 0.5054096812322736
test: 0.6065 loss: 0.6647814108908177
epoch time: 12.61min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v1_lg_6.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_6.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_4.pkl
best train: 0.7967
best train val: 0.7404
best test: 0.6238
epoch 8 version: 1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.85165 loss: 0.2955857342290706
train val: 0.7146 loss: 0.49476167361987755
test: 0.5672 loss: 0.677502811217308
epoch time: 12.18min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v1_lg_3.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_3.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_4.pkl
best train: 0.7967
best train val: 0.7404
best test: 0.6238
epoch 9 version: 1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.83785 loss: 0.3269555242642882
train val: 0.6885 loss: 0.5231861948633566
test: 0.5911 loss: 0.678696991366148
epoch time: 11.71min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v1_lg_8.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_8.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_4.pkl
best train: 0.7967
best train val: 0.7404
best test: 0.6238
epoch 10 version: 1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.7859 loss: 0.39266161631591656
train val: 0.6426 loss: 0.6751437024593353
test: 0.5997 loss: 0.680644779920578
epoch time: 11.76min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v1_lg_9.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_9.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_4.pkl
best train: 0.7967
best train val: 0.7404
best test: 0.6238
epoch 11 version: 1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.78225 loss: 0.40858261410455454
train val: 0.7072 loss: 0.5126781366297976
test: 0.6184 loss: 0.6603403116494417
epoch time: 12.34min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v1_lg_10.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_10.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_4.pkl
best train: 0.7967
best train val: 0.7404
best test: 0.6238
epoch 12 version: 1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.816 loss: 0.3378374571927474
train val: 0.698 loss: 0.5271102300822735
test: 0.6009 loss: 0.6679642030537128
epoch time: 12.47min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v1_lg_11.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_11.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_4.pkl
best train: 0.7967
best train val: 0.7404
best test: 0.6238
epoch 13 version: 1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.7995 loss: 0.3730807661987143
train val: 0.6973 loss: 0.5118417954889126
test: 0.5804 loss: 0.674078323417902
epoch time: 11.84min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v1_lg_12.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_12.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_4.pkl
best train: 0.7967
best train val: 0.7404
best test: 0.6238
epoch 14 version: 1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.8008 loss: 0.3582791952083917
train val: 0.7158 loss: 0.49544791013267825
test: 0.5973 loss: 0.6682889256238937
epoch time: 11.80min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v1_lg_13.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_13.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_4.pkl
best train: 0.7967
best train val: 0.7404
best test: 0.6238
epoch 15 version: 1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.79565 loss: 0.3673930954673018
train val: 0.7148 loss: 0.5299848202777961
test: 0.5999 loss: 0.6766994301557541
epoch time: 12.36min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v1_lg_14.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_14.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_4.pkl
best train: 0.7967
best train val: 0.7404
best test: 0.6238
epoch 16 version: 1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.6385 loss: 27.030408537567006
train val: 0.3436 loss: 1.050881676888466
test: 0.496 loss: 1.5369874935269356
epoch time: 12.49min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v1_lg_15.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_15.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_4.pkl
best train: 0.7967
best train val: 0.7404
best test: 0.6238
epoch 17 version: 1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.4973 loss: 50.194410585306315
train val: 0.5038 loss: 0.746741083574295
test: 0.521 loss: 0.7223520092487336
epoch time: 12.38min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v1_lg_16.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_16.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_4.pkl
best train: 0.7967
best train val: 0.7404
best test: 0.6238
epoch 18 version: 1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.50155 loss: 49.81733717427254
train val: 0.3892 loss: 1.915625043272972
test: 0.4657 loss: 1.6232771537706256
epoch time: 12.48min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v1_lg_17.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_17.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_4.pkl
best train: 0.7967
best train val: 0.7404
best test: 0.6238
epoch 19 version: 1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.50025 loss: 49.944751196074264
train val: 0.662 loss: 10.37769546270267
test: 0.5624 loss: 18.142325737375735
epoch time: 12.47min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v1_lg_18.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_18.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_4.pkl
best train: 0.7967
best train val: 0.7404
best test: 0.6238
epoch 20 version: 1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.4992 loss: 50.07208055458069
train val: 0.3267 loss: 24.24533764414809
test: 0.4358 loss: 28.339827883603242
epoch time: 12.34min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v1_lg_19.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_19.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_4.pkl
best train: 0.7967
best train val: 0.7404
best test: 0.6238
epoch 21 version: 1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.52225 loss: 47.74694011605978
train val: 0.6429 loss: 12.259224948472832
test: 0.5623 loss: 22.6667450896178
epoch time: 11.58min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v1_lg_20.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_20.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_4.pkl
best train: 0.7967
best train val: 0.7404
best test: 0.6238
epoch 22 version: 1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.5332 loss: 46.669964304852485
train val: 0.6582 loss: 11.289478110607627
test: 0.5568 loss: 20.955925363391817
epoch time: 12.59min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v1_lg_21.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_21.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_4.pkl
best train: 0.7967
best train val: 0.7404
best test: 0.6238
epoch 23 version: 1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.50995 loss: 48.988715807724
train val: 0.3464 loss: 25.065784277103173
test: 0.4745 loss: 21.690325876494036
epoch time: 12.13min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v1_lg_22.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_22.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_4.pkl
best train: 0.7967
best train val: 0.7404
best test: 0.6238
epoch 24 version: 1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.5026 loss: 49.735440678787235
train val: 0.3593 loss: 28.24382880662794
test: 0.4927 loss: 23.16735185020277
epoch time: 12.09min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v1_lg_23.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_23.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_4.pkl
best train: 0.7967
best train val: 0.7404
best test: 0.6238
epoch 25 version: 1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.5037 loss: 49.62121589508057
train val: 0.523 loss: 25.317519090477322
test: 0.4787 loss: 26.984557366690378
epoch time: 12.28min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v1_lg_24.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_24.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_4.pkl
best train: 0.7967
best train val: 0.7404
best test: 0.6238
epoch 26 version: 1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.4995 loss: 50.048625
train val: 0.4695 loss: 33.73699609097387
test: 0.4789 loss: 32.34724007236868
epoch time: 12.30min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v1_lg_25.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_25.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_4.pkl
best train: 0.7967
best train val: 0.7404
best test: 0.6238
epoch 27 version: 1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.4914 loss: 50.848805027008055
train val: 0.6735 loss: 10.180343389879466
test: 0.5533 loss: 15.884017224394816
epoch time: 12.64min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v1_lg_26.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_26.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_4.pkl
best train: 0.7967
best train val: 0.7404
best test: 0.6238
epoch 28 version: 1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.51505 loss: 48.49062222595215
train val: 0.5584 loss: 43.48087832421981
test: 0.436 loss: 56.285347221338746
epoch time: 12.33min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v1_lg_27.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_27.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_4.pkl
best train: 0.7967
best train val: 0.7404
best test: 0.6238
epoch 29 version: 1


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.50105 loss: 49.8879390625
train val: 0.3741 loss: 8.84468714413643
test: 0.4749 loss: 2.147155709424242
epoch time: 12.43min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v1_lg_28.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_28.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v1_lg_4.pkl
best train: 0.7967
best train val: 0.7404
best test: 0.6238


test val:   0%|          | 0/10000 [00:00<?, ?it/s]

test val: {'accuracy': 0.4764} loss: 2.130697231856617
correct


Unnamed: 0,gen_name,parent_left,child_left,parent_right,child_right,sway_left,sway_right,correct_is_right,logit_left,logit_right,preds,truth
1,*valid--!copy;(mtl:20);(sw);(st),Men are still plenty selective about who they ...,"Men are selective, as in, they'll select from ...","Men are selective, as in, they'll select from ...",Men are still plenty selective about who they ...,0.999877,0.000123,False,0.991422,0.008577,0,0
2,random--valid;(sw);(st),Dungeons and Dragons,Did you see this post a week ago and got jealo...,"Question for the men, what's the worst part of...",When sitting wrong on your bike seat and cutti...,0.119203,0.880797,True,0.437824,0.562177,1,1
3,random--valid;(sw);(st),I always brush my teeth but need to floss more...,As an american all I can say is: why?,[removed],User profile checks out. Bet you're on a list ...,0.119203,0.880797,True,0.377541,0.622459,1,1
3,*valid--random;(mtl:20);(sw);(st),The cigarette industry social lied about cigar...,covid vaccines can protect from covid,“Women get sex so easy!” “Man I bet no one wan...,You guys watch porn with sound on??? What a pr...,0.880797,0.119203,False,0.998073,0.001927,0,0
2,*valid--random;(sw);(st),$5 and I'll never tell him i won as I wouldn't...,Sounds like a plot to a movie lol,"The last iteration of them yeah, just thanks t...",Don't throw away your money. When you get paid...,0.880797,0.119203,False,0.731059,0.268941,0,0


incorrect


Unnamed: 0,gen_name,parent_left,child_left,parent_right,child_right,sway_left,sway_right,correct_is_right,logit_left,logit_right,preds,truth
0,!copy--valid;(mtl:20);(sw);(st),Gonna ride the baloney pony all the way to Was...,I'd literally take the law into my own hands.,I'd literally take the law into my own hands.,Gonna ride the baloney pony all the way to Was...,4.662937e-15,1.0,True,0.5,0.5,0,1
0,*valid--random;(sw);(st),And they taste delicious when chopped into hal...,"In Spain/Portugal we also cook them ""a feira"" ...",Seinfeld seems to be the one that paved the wa...,Wiping down gym equipment.,0.9933071,0.006692851,False,0.001501,0.998499,1,0
1,*valid--!copy;(mtl:20);(sw);(st),Yeah that's a thing. I spend my summer vacatio...,you ever done on going maintenance on a boat? ...,you ever done on going maintenance on a boat? ...,Yeah that's a thing. I spend my summer vacatio...,1.0,1.5628820000000001e-18,False,0.022977,0.977023,1,0
2,*valid--!copy;(sw);(st),She actually has a song where this is the poin...,"Yep, this one came to mind. She is incredible ...","Yep, this one came to mind. She is incredible ...",She actually has a song where this is the poin...,0.9975274,0.002472623,False,0.268941,0.731059,1,0
0,random--valid;(mtl:20);(sw);(st),So that's where it came from albeit it's still...,Yeah that's pretty much what he did. Epstein h...,I work in a kitchen full of men and it's quite...,Tears are for the French,0.0009110512,0.9990889,True,0.977023,0.022977,0,1


array([[-35.501076, -38.933743, -30.15163 , ...,  40.259182,  37.36583 ,
         39.754585],
       [-35.50106 , -38.933727, -30.151619, ...,  40.259163,  37.365814,
         39.75457 ],
       [-35.501072, -38.93374 , -30.151628, ...,  40.259174,  37.365826,
         39.75458 ],
       [-35.50107 , -38.933735, -30.151627, ...,  40.25917 ,  37.36582 ,
         39.754578]], dtype=float32)

array([[-35.501072, -38.933743, -30.151628, ...,  40.25918 ,  37.365826,
         39.754585],
       [-35.501072, -38.933735, -30.151628, ...,  40.259174,  37.365826,
         39.754578],
       [-35.501072, -38.93374 , -30.151628, ...,  40.259174,  37.365826,
         39.75458 ],
       [-35.501072, -38.93374 , -30.151628, ...,  40.259174,  37.365826,
         39.75458 ]], dtype=float32)

array([[-35.501072, -38.933735, -30.151628, ...,  40.259174,  37.365826,
         39.754578],
       [-35.501072, -38.933735, -30.151628, ...,  40.259174,  37.365826,
         39.754578],
       [-35.50107 , -38.933735, -30.151628, ...,  40.25917 ,  37.36582 ,
         39.754578],
       [-35.50107 , -38.933735, -30.151628, ...,  40.259174,  37.365826,
         39.75458 ]], dtype=float32)

array([[-35.501076, -38.933743, -30.15163 , ...,  40.25918 ,  37.36583 ,
         39.754585],
       [-35.501045, -38.933712, -30.15161 , ...,  40.259148,  37.365803,
         39.754555],
       [-35.50107 , -38.933735, -30.151627, ...,  40.25917 ,  37.36582 ,
         39.754578],
       [-35.501072, -38.933735, -30.151628, ...,  40.259174,  37.365826,
         39.75458 ]], dtype=float32)

array([[-35.50106 , -38.93373 , -30.151623, ...,  40.259167,  37.365814,
         39.75457 ],
       [-35.501076, -38.93374 , -30.15163 , ...,  40.259174,  37.36583 ,
         39.754585],
       [-35.501076, -38.933743, -30.151634, ...,  40.259182,  37.36583 ,
         39.754585],
       [-35.501076, -38.933743, -30.151634, ...,  40.259182,  37.36583 ,
         39.754585]], dtype=float32)

array([[-35.501076, -38.93374 , -30.15163 , ...,  40.25918 ,  37.365826,
         39.754585],
       [-35.501076, -38.933743, -30.15163 , ...,  40.25918 ,  37.36583 ,
         39.754585],
       [-35.501007, -38.93367 , -30.151577, ...,  40.2591  ,  37.365765,
         39.754513],
       [-35.501076, -38.933743, -30.151634, ...,  40.25918 ,  37.36583 ,
         39.754585]], dtype=float32)

array([[-35.50106 , -38.933727, -30.15162 , ...,  40.259163,  37.365814,
         39.75457 ],
       [-35.50107 , -38.933735, -30.151627, ...,  40.25917 ,  37.365826,
         39.754578],
       [-35.501053, -38.933716, -30.151611, ...,  40.25915 ,  37.365807,
         39.754562],
       [-35.501076, -38.933743, -30.151634, ...,  40.25918 ,  37.36583 ,
         39.754585]], dtype=float32)

array([[-35.50106 , -38.933727, -30.151619, ...,  40.259163,  37.365814,
         39.75457 ],
       [-35.501076, -38.933743, -30.15163 , ...,  40.25918 ,  37.36583 ,
         39.754585],
       [-35.501076, -38.933743, -30.151634, ...,  40.25918 ,  37.36583 ,
         39.754585],
       [-35.501076, -38.933743, -30.151634, ...,  40.259182,  37.36583 ,
         39.754585]], dtype=float32)

array([[-35.501015, -38.933678, -30.15158 , ...,  40.259106,  37.36577 ,
         39.75452 ],
       [-35.501076, -38.93374 , -30.151628, ...,  40.25918 ,  37.36583 ,
         39.754585],
       [-35.50107 , -38.933735, -30.151628, ...,  40.25917 ,  37.365826,
         39.754578],
       [-35.50107 , -38.933735, -30.151628, ...,  40.259174,  37.365826,
         39.754578]], dtype=float32)

array([[-35.501076, -38.933743, -30.15163 , ...,  40.25918 ,  37.365826,
         39.754585],
       [-35.501076, -38.933743, -30.15163 , ...,  40.25918 ,  37.365826,
         39.754585],
       [-35.501072, -38.933735, -30.151628, ...,  40.259174,  37.365826,
         39.754578],
       [-35.501076, -38.93374 , -30.151628, ...,  40.25918 ,  37.36583 ,
         39.75458 ]], dtype=float32)

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_transform.bias', 'vocab_projector.bias', 'vocab_transform.weight', 'vocab_layer_norm.bias', 'vocab_layer_norm.weight', 'vocab_projector.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_transform.bias', 'vocab_projector.bias', 'vocab_transform.weight', 'vocab_layer_norm.bias', 'vocab_layer_norm.weight', 'vocab_projector.weight']
- T

None 0
epoch 0 version: 2


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.7382 loss: 0.5436004432678223
train val: 0.7219 loss: 0.5373253197908402
test: 0.6437 loss: 0.6466006888747216
epoch time: 12.16min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_0.pkl
best train: 0.7382
best train val: 0.7219
best test: 0.6437
epoch 1 version: 2


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.77575 loss: 0.4879291292309761
train val: 0.7181 loss: 0.5370447759866714
test: 0.5996 loss: 0.65916873036623
epoch time: 12.33min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_0.pkl
best train: 0.7382
best train val: 0.7219
best test: 0.6437
epoch 2 version: 2


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.78305 loss: 0.47299463711977
train val: 0.6743 loss: 0.5451672175824642
test: 0.5279 loss: 0.6726683840751648
epoch time: 12.19min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_0.pkl
best train: 0.7382
best train val: 0.7219
best test: 0.6437
epoch 3 version: 2


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.7771 loss: 0.46247362550199034
train val: 0.6916 loss: 0.554022440713644
test: 0.5806 loss: 0.6761620312452317
epoch time: 11.89min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_0.pkl
best train: 0.7382
best train val: 0.7219
best test: 0.6437
epoch 4 version: 2


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.75365 loss: 0.4799625008225441
train val: 0.6531 loss: 0.5686483474910259
test: 0.502 loss: 0.682471077299118
epoch time: 12.32min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_0.pkl
best train: 0.7382
best train val: 0.7219
best test: 0.6437
epoch 5 version: 2


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.7677 loss: 0.44780092733502386
train val: 0.6621 loss: 0.5482684583961963
test: 0.5419 loss: 0.676467588531971
epoch time: 12.49min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_0.pkl
best train: 0.7382
best train val: 0.7219
best test: 0.6437
epoch 6 version: 2


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.72785 loss: 0.5050181359857321
train val: 0.6269 loss: 0.5664390466213226
test: 0.5066 loss: 0.6991027557849884
epoch time: 12.41min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v2_lg_4.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_4.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_0.pkl
best train: 0.7382
best train val: 0.7219
best test: 0.6437
epoch 7 version: 2


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.74825 loss: 0.4825803387731314
train val: 0.6624 loss: 0.5648609751880169
test: 0.5442 loss: 0.6939609698772431
epoch time: 12.47min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v2_lg_6.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_6.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_0.pkl
best train: 0.7382
best train val: 0.7219
best test: 0.6437
epoch 8 version: 2


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.721 loss: 0.5010376846224069
train val: 0.6379 loss: 0.5682460744261741
test: 0.5112 loss: 0.6818918744921685
epoch time: 12.16min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v2_lg_5.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_5.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_0.pkl
best train: 0.7382
best train val: 0.7219
best test: 0.6437
epoch 9 version: 2


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.74375 loss: 0.476468591606617
train val: 0.6319 loss: 0.556985398465395
test: 0.5384 loss: 0.6767604033946991
epoch time: 12.46min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v2_lg_8.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_8.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_0.pkl
best train: 0.7382
best train val: 0.7219
best test: 0.6437
epoch 10 version: 2


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.71405 loss: 0.5264103473514319
train val: 0.646 loss: 0.600843989121914
test: 0.5507 loss: 0.6919250538349152
epoch time: 12.49min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v2_lg_9.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_9.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_0.pkl
best train: 0.7382
best train val: 0.7219
best test: 0.6437
epoch 11 version: 2


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.73615 loss: 0.4970443810015917
train val: 0.6362 loss: 0.6158906040668487
test: 0.5404 loss: 0.7157549296855926
epoch time: 12.07min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v2_lg_10.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_10.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_0.pkl
best train: 0.7382
best train val: 0.7219
best test: 0.6437
epoch 12 version: 2


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.73605 loss: 0.4852493601769209
train val: 0.6316 loss: 0.5621234013736248
test: 0.5102 loss: 0.6946030510425567
epoch time: 12.03min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v2_lg_11.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_11.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_0.pkl
best train: 0.7382
best train val: 0.7219
best test: 0.6437
epoch 13 version: 2


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.71 loss: 0.5411568253099919
train val: 0.6672 loss: 0.5711875532388687
test: 0.5403 loss: 0.6978048977971077
epoch time: 12.18min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v2_lg_12.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_12.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_0.pkl
best train: 0.7382
best train val: 0.7219
best test: 0.6437
epoch 14 version: 2


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.6893 loss: 0.553735734051466
train val: 0.6381 loss: 0.6145506540060043
test: 0.496 loss: 0.7237619694590569
epoch time: 12.24min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v2_lg_7.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_7.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_0.pkl
best train: 0.7382
best train val: 0.7219
best test: 0.6437
epoch 15 version: 2


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.68445 loss: 0.5621690855115652
train val: 0.6618 loss: 0.5948223791122437
test: 0.5388 loss: 0.7124242303788662
epoch time: 12.51min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v2_lg_14.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_14.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_0.pkl
best train: 0.7382
best train val: 0.7219
best test: 0.6437
epoch 16 version: 2


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.6664 loss: 0.5518452081650496
train val: 0.6476 loss: 0.5717746386408806
test: 0.5586 loss: 0.6970139467477798
epoch time: 11.87min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v2_lg_15.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_15.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_0.pkl
best train: 0.7382
best train val: 0.7219
best test: 0.6437
epoch 17 version: 2


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.70355 loss: 0.5424724908173084
train val: 0.6623 loss: 0.5956092210650444
test: 0.5647 loss: 0.7070258922219277
epoch time: 12.52min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v2_lg_16.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_16.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_0.pkl
best train: 0.7382
best train val: 0.7219
best test: 0.6437
epoch 18 version: 2


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.72 loss: 0.5346715016037226
train val: 0.6609 loss: 0.5765202501952649
test: 0.5314 loss: 0.7025192899823188
epoch time: 12.16min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v2_lg_17.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_17.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_0.pkl
best train: 0.7382
best train val: 0.7219
best test: 0.6437
epoch 19 version: 2


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.72485 loss: 0.5216017502844333
train val: 0.6671 loss: 0.5780805082082748
test: 0.532 loss: 0.7117546213388443
epoch time: 12.07min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v2_lg_18.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_18.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_0.pkl
best train: 0.7382
best train val: 0.7219
best test: 0.6437
epoch 20 version: 2


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.72425 loss: 0.5179894398450852
train val: 0.6062 loss: 0.6023272743463516
test: 0.5008 loss: 0.730035565173626
epoch time: 12.44min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v2_lg_19.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_19.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_0.pkl
best train: 0.7382
best train val: 0.7219
best test: 0.6437
epoch 21 version: 2


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.674 loss: 0.5505021845072507
train val: 0.6234 loss: 0.6887335543632507
test: 0.5189 loss: 0.6870060274362564
epoch time: 12.31min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v2_lg_20.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_20.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_0.pkl
best train: 0.7382
best train val: 0.7219
best test: 0.6437
epoch 22 version: 2


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.6258 loss: 0.6275800784528256
train val: 0.665 loss: 0.5798922597289086
test: 0.5323 loss: 0.7176657816648483
epoch time: 12.13min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v2_lg_21.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_21.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_0.pkl
best train: 0.7382
best train val: 0.7219
best test: 0.6437
epoch 23 version: 2


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.7041 loss: 0.521009984651208
train val: 0.6581 loss: 0.6115516387760639
test: 0.5403 loss: 0.739108122164011
epoch time: 12.30min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v2_lg_22.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_22.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_0.pkl
best train: 0.7382
best train val: 0.7219
best test: 0.6437
epoch 24 version: 2


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.69075 loss: 0.5376470417052508
train val: 0.6375 loss: 0.573294308423996
test: 0.5424 loss: 0.7036630336761475
epoch time: 12.05min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v2_lg_23.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_23.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_0.pkl
best train: 0.7382
best train val: 0.7219
best test: 0.6437
epoch 25 version: 2


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.7173 loss: 0.5006520924389363
train val: 0.6684 loss: 0.5739845311641694
test: 0.5567 loss: 0.7050605735898018
epoch time: 12.41min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v2_lg_24.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_24.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_0.pkl
best train: 0.7382
best train val: 0.7219
best test: 0.6437
epoch 26 version: 2


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.70925 loss: 0.5127677287817002
train val: 0.4935 loss: 0.6791227298974991
test: 0.492 loss: 0.7062642375349999
epoch time: 12.25min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v2_lg_13.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_13.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_0.pkl
best train: 0.7382
best train val: 0.7219
best test: 0.6437
epoch 27 version: 2


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.6626 loss: 0.5905535309731961
train val: 0.6255 loss: 0.6275408550024033
test: 0.4795 loss: 0.8362869766831398
epoch time: 12.06min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v2_lg_26.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_26.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_0.pkl
best train: 0.7382
best train val: 0.7219
best test: 0.6437
epoch 28 version: 2


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.7042 loss: 0.5375048423677683
train val: 0.6644 loss: 0.5784277902841568
test: 0.4971 loss: 0.7197438518762589
epoch time: 12.17min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v2_lg_27.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_27.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_0.pkl
best train: 0.7382
best train val: 0.7219
best test: 0.6437
epoch 29 version: 2


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.72485 loss: 0.5113693304032088
train val: 0.6446 loss: 0.5867487464129925
test: 0.5313 loss: 0.7140842463970184
epoch time: 12.28min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v2_lg_28.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_28.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v2_lg_0.pkl
best train: 0.7382
best train val: 0.7219
best test: 0.6437


test val:   0%|          | 0/10000 [00:00<?, ?it/s]

test val: {'accuracy': 0.5348} loss: 0.7052360381543636
correct


Unnamed: 0,gen_name,parent_left,child_left,parent_right,child_right,sway_left,sway_right,correct_is_right,logit_left,logit_right,preds,truth
0,!copy--valid;(mtl:20);(sw);(st),Gonna ride the baloney pony all the way to Was...,I'd literally take the law into my own hands.,I'd literally take the law into my own hands.,Gonna ride the baloney pony all the way to Was...,4.662937e-15,1.0,True,0.499946,0.500054,1,1
1,*valid--!copy;(mtl:20);(sw);(st),Men are still plenty selective about who they ...,"Men are selective, as in, they'll select from ...","Men are selective, as in, they'll select from ...",Men are still plenty selective about who they ...,0.9998766,0.000123,False,0.529094,0.470906,0,0
2,random--valid;(sw);(st),Dungeons and Dragons,Did you see this post a week ago and got jealo...,"Question for the men, what's the worst part of...",When sitting wrong on your bike seat and cutti...,0.1192029,0.880797,True,0.499985,0.500015,1,1
3,random--valid;(sw);(st),I always brush my teeth but need to floss more...,As an american all I can say is: why?,[removed],User profile checks out. Bet you're on a list ...,0.1192029,0.880797,True,0.11982,0.88018,1,1
0,*valid--random;(sw);(st),And they taste delicious when chopped into hal...,"In Spain/Portugal we also cook them ""a feira"" ...",Seinfeld seems to be the one that paved the wa...,Wiping down gym equipment.,0.9933071,0.006693,False,0.734537,0.265463,0,0


incorrect


Unnamed: 0,gen_name,parent_left,child_left,parent_right,child_right,sway_left,sway_right,correct_is_right,logit_left,logit_right,preds,truth
2,*valid--!copy;(sw);(st),She actually has a song where this is the poin...,"Yep, this one came to mind. She is incredible ...","Yep, this one came to mind. She is incredible ...",She actually has a song where this is the poin...,0.997527,0.002473,False,0.430999,0.569001,1,0
0,random--valid;(mtl:20);(sw);(st),So that's where it came from albeit it's still...,Yeah that's pretty much what he did. Epstein h...,I work in a kitchen full of men and it's quite...,Tears are for the French,0.000911,0.999089,True,0.500104,0.499896,0,1
2,*valid--random;(sw);(st),$5 and I'll never tell him i won as I wouldn't...,Sounds like a plot to a movie lol,"The last iteration of them yeah, just thanks t...",Don't throw away your money. When you get paid...,0.880797,0.119203,False,0.499969,0.500031,1,0
3,*valid--!copy;(mtl:20);(sw);(st),You can tell I used to be pretty 10 years ago,Felt this in my soul,Felt this in my soul,You can tell I used to be pretty 10 years ago,0.993307,0.006693,False,0.499932,0.500068,1,0
2,*valid--random;(mtl:20);(sw);(st),“Repetitive discography” but you don’t even kn...,Are you not comprehending that *I'm* not the o...,Best kept as a fantasy.,*chumbawumba pumping in the background* ROUND 2,0.880797,0.119203,False,0.499981,0.500019,1,0


array([[-0.10982576,  0.13541806, -0.3482886 , ...,  0.25888145,
         0.27427363,  0.47009835],
       [-0.1098257 ,  0.13541807, -0.34828863, ...,  0.25888148,
         0.27427363,  0.47009838],
       [-0.10982577,  0.13541804, -0.34828863, ...,  0.25888136,
         0.27427363,  0.47009832],
       [-0.10982579,  0.1354181 , -0.34828866, ...,  0.25888142,
         0.27427366,  0.47009838]], dtype=float32)

array([[-0.10982579,  0.13541813, -0.34828863, ...,  0.25888142,
         0.27427363,  0.47009858],
       [-0.10982574,  0.13541807, -0.34828857, ...,  0.25888142,
         0.27427357,  0.47009847],
       [-0.10982574,  0.13541813, -0.3482885 , ...,  0.2588814 ,
         0.27427357,  0.47009847],
       [-0.10982577,  0.13541812, -0.3482886 , ...,  0.25888136,
         0.27427357,  0.4700985 ]], dtype=float32)

array([[-0.1098258 ,  0.1354181 , -0.3482885 , ...,  0.2588814 ,
         0.2742736 ,  0.47009847],
       [-0.10982576,  0.13541815, -0.3482885 , ...,  0.25888136,
         0.27427357,  0.4700985 ],
       [-0.10982575,  0.13541813, -0.3482885 , ...,  0.25888136,
         0.27427363,  0.47009853],
       [-0.1098258 ,  0.13541812, -0.34828866, ...,  0.25888148,
         0.2742736 ,  0.4700985 ]], dtype=float32)

array([[-0.10982578,  0.13541807, -0.34828866, ...,  0.25888142,
         0.2742736 ,  0.47009838],
       [-0.10982575,  0.13541804, -0.3482885 , ...,  0.25888142,
         0.27427357,  0.47009835],
       [-0.10982575,  0.1354181 , -0.3482885 , ...,  0.25888142,
         0.27427357,  0.47009838],
       [-0.1098258 ,  0.13541807, -0.3482886 , ...,  0.25888142,
         0.27427363,  0.47009838]], dtype=float32)

array([[-0.1098258 ,  0.13541806, -0.34828866, ...,  0.25888145,
         0.27427363,  0.47009838],
       [-0.10982575,  0.13541806, -0.3482886 , ...,  0.25888142,
         0.27427363,  0.47009832],
       [-0.10982575,  0.13541806, -0.34828863, ...,  0.25888136,
         0.27427363,  0.4700983 ],
       [-0.10982577,  0.13541812, -0.34828863, ...,  0.25888133,
         0.2742736 ,  0.4700983 ]], dtype=float32)

array([[-0.10982582,  0.13541807, -0.3482886 , ...,  0.25888148,
         0.27427354,  0.47009838],
       [-0.10982582,  0.13541807, -0.34828863, ...,  0.2588814 ,
         0.27427354,  0.4700984 ],
       [-0.10982575,  0.13541813, -0.34828833, ...,  0.25888124,
         0.27427328,  0.47009805],
       [-0.10982579,  0.1354181 , -0.34828863, ...,  0.25888145,
         0.27427357,  0.4700984 ]], dtype=float32)

array([[-0.10982575,  0.1354181 , -0.34828848, ...,  0.25888145,
         0.27427363,  0.47009838],
       [-0.10982577,  0.1354181 , -0.34828863, ...,  0.25888145,
         0.27427363,  0.47009838],
       [-0.10982579,  0.13541803, -0.34828857, ...,  0.25888148,
         0.27427354,  0.47009838],
       [-0.10982574,  0.13541815, -0.34828857, ...,  0.25888127,
         0.27427354,  0.4700983 ]], dtype=float32)

array([[-0.10982577,  0.13541806, -0.34828866, ...,  0.2588814 ,
         0.2742735 ,  0.4700984 ],
       [-0.10982581,  0.13541806, -0.34828863, ...,  0.25888142,
         0.27427357,  0.47009847],
       [-0.10982582,  0.1354181 , -0.34828866, ...,  0.25888145,
         0.2742736 ,  0.4700984 ],
       [-0.1098258 ,  0.13541812, -0.34828863, ...,  0.25888142,
         0.27427357,  0.47009838]], dtype=float32)

array([[-0.10982617,  0.13541828, -0.34828892, ...,  0.25888133,
         0.27427375,  0.4700983 ],
       [-0.10982577,  0.13541807, -0.34828863, ...,  0.25888148,
         0.2742736 ,  0.47009838],
       [-0.10982577,  0.1354181 , -0.34828863, ...,  0.25888145,
         0.27427354,  0.47009835],
       [-0.10982577,  0.13541807, -0.34828857, ...,  0.25888145,
         0.27427357,  0.4700984 ]], dtype=float32)

array([[-0.10982578,  0.13541812, -0.34828866, ...,  0.25888145,
         0.27427363,  0.4700985 ],
       [-0.10982579,  0.1354181 , -0.34828866, ...,  0.25888145,
         0.27427363,  0.47009856],
       [-0.10982575,  0.13541812, -0.3482885 , ...,  0.25888136,
         0.27427357,  0.47009838],
       [-0.10982579,  0.13541812, -0.34828857, ...,  0.25888142,
         0.2742736 ,  0.4700985 ]], dtype=float32)

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_transform.bias', 'vocab_projector.bias', 'vocab_transform.weight', 'vocab_layer_norm.bias', 'vocab_layer_norm.weight', 'vocab_projector.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_transform.bias', 'vocab_projector.bias', 'vocab_transform.weight', 'vocab_layer_norm.bias', 'vocab_layer_norm.weight', 'vocab_projector.weight']
- T

None 0
epoch 0 version: 3


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.7033 loss: 0.5617496463954449
train val: 0.6892 loss: 0.5678591298043728
test: 0.6324 loss: 0.6619643114566803
epoch time: 12.22min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_0.pkl
best train: 0.7033
best train val: 0.6892
best test: 0.6324
epoch 1 version: 3


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.7419 loss: 0.5085124225139618
train val: 0.6764 loss: 0.5534535784065724
test: 0.6031 loss: 0.6642347450375556
epoch time: 12.42min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_0.pkl
best train: 0.7033
best train val: 0.6892
best test: 0.6324
epoch 2 version: 3


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.73585 loss: 0.5102201949387789
train val: 0.66 loss: 0.5653609725475311
test: 0.5335 loss: 0.6861743380188942
epoch time: 12.28min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_0.pkl
best train: 0.7033
best train val: 0.6892
best test: 0.6324
epoch 3 version: 3


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.64005 loss: 0.5852636605888605
train val: 0.5589 loss: 0.6929148038387298
test: 0.5185 loss: 0.6930917269945145
epoch time: 12.20min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_0.pkl
best train: 0.7033
best train val: 0.6892
best test: 0.6324
epoch 4 version: 3


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.50915 loss: 0.6916855801939964
train val: 0.6076 loss: 0.6931371712207794
test: 0.4989 loss: 0.6931468685865402
epoch time: 12.24min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_0.pkl
best train: 0.7033
best train val: 0.6892
best test: 0.6324
epoch 5 version: 3


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.5102 loss: 0.6915565792202949
train val: 0.4412 loss: 0.6931739897251129
test: 0.4479 loss: 0.6931699772119522
epoch time: 12.52min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_0.pkl
best train: 0.7033
best train val: 0.6892
best test: 0.6324
epoch 6 version: 3


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.5149 loss: 0.6856931548565626
train val: 0.6326 loss: 0.6810694022417069
test: 0.5572 loss: 0.6877330417394638
epoch time: 12.62min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v3_lg_5.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_5.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_0.pkl
best train: 0.7033
best train val: 0.6892
best test: 0.6324
epoch 7 version: 3


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.57815 loss: 0.6606382358938455
train val: 0.6264 loss: 0.693165477347374
test: 0.5127 loss: 0.6931443924427032
epoch time: 12.25min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v3_lg_3.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_3.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_0.pkl
best train: 0.7033
best train val: 0.6892
best test: 0.6324
epoch 8 version: 3


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.50255 loss: 0.6930046233057976
train val: 0.4187 loss: 0.6927673726797103
test: 0.5288 loss: 0.685167306804657
epoch time: 12.29min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v3_lg_4.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_4.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_0.pkl
best train: 0.7033
best train val: 0.6892
best test: 0.6324
epoch 9 version: 3


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.51085 loss: 0.691262641108036
train val: 0.4301 loss: 0.6933011065721512
test: 0.4978 loss: 0.6930873106002807
epoch time: 12.63min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v3_lg_8.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_8.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_0.pkl
best train: 0.7033
best train val: 0.6892
best test: 0.6324
epoch 10 version: 3


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.49815 loss: 0.69316381534338
train val: 0.6636 loss: 0.6931356738328933
test: 0.5688 loss: 0.6931432903289795
epoch time: 12.23min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v3_lg_9.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_9.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_0.pkl
best train: 0.7033
best train val: 0.6892
best test: 0.6324
epoch 11 version: 3


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.4966 loss: 0.6931263701915741
train val: 0.4975 loss: 0.6931471840381622
test: 0.5014 loss: 0.6931471832275391
epoch time: 12.38min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v3_lg_7.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_7.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_0.pkl
best train: 0.7033
best train val: 0.6892
best test: 0.6324
epoch 12 version: 3


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.49845 loss: 0.6934263385772705
train val: 0.6239 loss: 0.6931465582847596
test: 0.5858 loss: 0.6931467081785202
epoch time: 12.50min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v3_lg_11.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_11.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_0.pkl
best train: 0.7033
best train val: 0.6892
best test: 0.6324
epoch 13 version: 3


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.52725 loss: 0.6846488631904125
train val: 0.6393 loss: 0.6645096280932427
test: 0.5747 loss: 0.6869184143781663
epoch time: 12.36min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v3_lg_12.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_12.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_0.pkl
best train: 0.7033
best train val: 0.6892
best test: 0.6324
epoch 14 version: 3


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.5705 loss: 0.6634780151605606
train val: 0.574 loss: 0.6749198101043701
test: 0.531 loss: 0.7146374702572823
epoch time: 12.59min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v3_lg_6.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_6.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_0.pkl
best train: 0.7033
best train val: 0.6892
best test: 0.6324
epoch 15 version: 3


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.5852 loss: 0.6583482306480408
train val: 0.5026 loss: 0.6690360897302627
test: 0.5134 loss: 0.7075913267374039
epoch time: 12.75min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v3_lg_14.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_14.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_0.pkl
best train: 0.7033
best train val: 0.6892
best test: 0.6324
epoch 16 version: 3


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.58185 loss: 0.6599824712514877
train val: 0.5197 loss: 0.6599380514860154
test: 0.4745 loss: 0.7072553661227227
epoch time: 12.36min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v3_lg_15.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_15.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_0.pkl
best train: 0.7033
best train val: 0.6892
best test: 0.6324
epoch 17 version: 3


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.5028 loss: 0.6926906289517879
train val: 0.6223 loss: 0.6929803556442261
test: 0.5279 loss: 0.6930851772785187
epoch time: 12.70min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v3_lg_16.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_16.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_0.pkl
best train: 0.7033
best train val: 0.6892
best test: 0.6324
epoch 18 version: 3


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.51055 loss: 0.6924999232113361
train val: 0.6046 loss: 0.6930612421035767
test: 0.5004 loss: 0.6931534148454667
epoch time: 12.33min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v3_lg_17.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_17.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_0.pkl
best train: 0.7033
best train val: 0.6892
best test: 0.6324
epoch 19 version: 3


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.5434 loss: 0.6759001689016819
train val: 0.6134 loss: 0.6789492708206176
test: 0.5486 loss: 0.686909495317936
epoch time: 12.45min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v3_lg_18.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_18.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_0.pkl
best train: 0.7033
best train val: 0.6892
best test: 0.6324
epoch 20 version: 3


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.5068 loss: 0.6910870630145073
train val: 0.6372 loss: 0.6919001587867737
test: 0.5473 loss: 0.6917337539672852
epoch time: 12.45min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v3_lg_19.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_19.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_0.pkl
best train: 0.7033
best train val: 0.6892
best test: 0.6324
epoch 21 version: 3


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.59535 loss: 0.6650364360630512
train val: 0.6284 loss: 0.6569222520112992
test: 0.5377 loss: 0.6923804758429527
epoch time: 12.38min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v3_lg_20.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_20.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_0.pkl
best train: 0.7033
best train val: 0.6892
best test: 0.6324
epoch 22 version: 3


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.60145 loss: 0.6624089622735977
train val: 0.6348 loss: 0.6567256286859512
test: 0.5343 loss: 0.6946913577795029
epoch time: 12.70min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v3_lg_21.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_21.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_0.pkl
best train: 0.7033
best train val: 0.6892
best test: 0.6324
epoch 23 version: 3


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.55535 loss: 0.6804445593297481
train val: 0.3954 loss: 0.6924059812784195
test: 0.5133 loss: 0.6922453813910484
epoch time: 12.48min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v3_lg_22.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_22.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_0.pkl
best train: 0.7033
best train val: 0.6892
best test: 0.6324
epoch 24 version: 3


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.52795 loss: 0.689588915258646
train val: 0.3752 loss: 0.6937927819490433
test: 0.4661 loss: 0.6947247461557389
epoch time: 12.81min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v3_lg_23.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_23.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_0.pkl
best train: 0.7033
best train val: 0.6892
best test: 0.6324
epoch 25 version: 3


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.54095 loss: 0.6894388364255428
train val: 0.5601 loss: 0.6931441653251648
test: 0.4823 loss: 0.6931466552495956
epoch time: 12.78min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v3_lg_24.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_24.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_0.pkl
best train: 0.7033
best train val: 0.6892
best test: 0.6324
epoch 26 version: 3


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.55155 loss: 0.685929891115427
train val: 0.588 loss: 0.6930867851734162
test: 0.5018 loss: 0.6931084100723267
epoch time: 12.53min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v3_lg_25.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_25.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_0.pkl
best train: 0.7033
best train val: 0.6892
best test: 0.6324
epoch 27 version: 3


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.5614 loss: 0.6801022741258145
train val: 0.5858 loss: 0.6924727399587631
test: 0.5308 loss: 0.6919081317424775
epoch time: 12.40min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v3_lg_26.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_26.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_0.pkl
best train: 0.7033
best train val: 0.6892
best test: 0.6324
epoch 28 version: 3


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.5958 loss: 0.6656784185826778
train val: 0.6329 loss: 0.6854583766818046
test: 0.5675 loss: 0.7057419133126736
epoch time: 12.38min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v3_lg_27.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_27.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_0.pkl
best train: 0.7033
best train val: 0.6892
best test: 0.6324
epoch 29 version: 3


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.65625 loss: 0.5985681997001171
train val: 0.654 loss: 0.593029794305563
test: 0.5372 loss: 0.6867613129377365
epoch time: 12.40min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v3_lg_28.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_28.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v3_lg_0.pkl
best train: 0.7033
best train val: 0.6892
best test: 0.6324


test val:   0%|          | 0/10000 [00:00<?, ?it/s]

test val: {'accuracy': 0.5475} loss: 0.6842035049557685
correct


Unnamed: 0,gen_name,parent_left,child_left,parent_right,child_right,sway_left,sway_right,correct_is_right,logit_left,logit_right,preds,truth
1,*valid--!copy;(mtl:20);(sw);(st),Men are still plenty selective about who they ...,"Men are selective, as in, they'll select from ...","Men are selective, as in, they'll select from ...",Men are still plenty selective about who they ...,0.999877,0.0001233946,False,0.540767,0.459233,0,0
2,random--valid;(sw);(st),Dungeons and Dragons,Did you see this post a week ago and got jealo...,"Question for the men, what's the worst part of...",When sitting wrong on your bike seat and cutti...,0.119203,0.8807971,True,0.31953,0.68047,1,1
3,random--valid;(sw);(st),I always brush my teeth but need to floss more...,As an american all I can say is: why?,[removed],User profile checks out. Bet you're on a list ...,0.119203,0.8807971,True,0.237406,0.762594,1,1
0,*valid--random;(sw);(st),And they taste delicious when chopped into hal...,"In Spain/Portugal we also cook them ""a feira"" ...",Seinfeld seems to be the one that paved the wa...,Wiping down gym equipment.,0.993307,0.006692851,False,0.550512,0.449488,0,0
1,*valid--!copy;(mtl:20);(sw);(st),Yeah that's a thing. I spend my summer vacatio...,you ever done on going maintenance on a boat? ...,you ever done on going maintenance on a boat? ...,Yeah that's a thing. I spend my summer vacatio...,1.0,1.5628820000000001e-18,False,0.566271,0.433729,0,0


incorrect


Unnamed: 0,gen_name,parent_left,child_left,parent_right,child_right,sway_left,sway_right,correct_is_right,logit_left,logit_right,preds,truth
0,!copy--valid;(mtl:20);(sw);(st),Gonna ride the baloney pony all the way to Was...,I'd literally take the law into my own hands.,I'd literally take the law into my own hands.,Gonna ride the baloney pony all the way to Was...,4.662937e-15,1.0,True,0.580115,0.419885,0,1
2,*valid--!copy;(sw);(st),She actually has a song where this is the poin...,"Yep, this one came to mind. She is incredible ...","Yep, this one came to mind. She is incredible ...",She actually has a song where this is the poin...,0.9975274,0.002473,False,0.29363,0.70637,1,0
2,*valid--random;(sw);(st),$5 and I'll never tell him i won as I wouldn't...,Sounds like a plot to a movie lol,"The last iteration of them yeah, just thanks t...",Don't throw away your money. When you get paid...,0.8807971,0.119203,False,0.460682,0.539318,1,0
2,*valid--!copy;(sw);(st),I called the number at the end of his addictio...,Right. He's the real deal. He will answer his ...,Right. He's the real deal. He will answer his ...,I called the number at the end of his addictio...,1.0,0.0,False,0.486756,0.513244,1,0
0,random--valid;(mtl:20);(sw);(st),Day-bird aaaAAAAaaaa Fighter of the Night-bird...,Happy endings. Very underrated,You just described my relationship Lol,"That doesn't sound healthy, it's not supposed ...",0.01798621,0.982014,True,0.640024,0.359976,0,1


array([[ 8.666577 ,  4.9840765,  6.3375483, ..., 10.800936 ,  1.2757801,
         1.7305932],
       [ 8.666577 ,  4.984076 ,  6.3375483, ..., 10.800936 ,  1.27578  ,
         1.730593 ],
       [ 8.666577 ,  4.984075 ,  6.337547 , ..., 10.800936 ,  1.27578  ,
         1.7305928],
       [ 8.666577 ,  4.984075 ,  6.337547 , ..., 10.800935 ,  1.27578  ,
         1.7305928]], dtype=float32)

array([[ 8.666577 ,  4.984076 ,  6.3375483, ..., 10.800936 ,  1.2757802,
         1.7305931],
       [ 8.666577 ,  4.984075 ,  6.3375473, ..., 10.800935 ,  1.27578  ,
         1.7305931],
       [ 8.666577 ,  4.984076 ,  6.3375483, ..., 10.800936 ,  1.2757802,
         1.7305932],
       [ 8.666577 ,  4.984075 ,  6.3375473, ..., 10.800935 ,  1.2757798,
         1.730593 ]], dtype=float32)

array([[ 8.666577 ,  4.984076 ,  6.3375483, ..., 10.800937 ,  1.2757801,
         1.7305931],
       [ 8.666577 ,  4.984076 ,  6.337547 , ..., 10.800935 ,  1.27578  ,
         1.7305928],
       [ 8.666577 ,  4.984076 ,  6.3375483, ..., 10.800936 ,  1.27578  ,
         1.7305931],
       [ 8.666577 ,  4.9840755,  6.3375473, ..., 10.800936 ,  1.2757798,
         1.7305928]], dtype=float32)

array([[ 8.666577 ,  4.984076 ,  6.3375473, ..., 10.800936 ,  1.2757801,
         1.7305931],
       [ 8.666577 ,  4.984076 ,  6.3375483, ..., 10.800936 ,  1.27578  ,
         1.7305931],
       [ 8.666577 ,  4.984076 ,  6.3375483, ..., 10.800936 ,  1.2757798,
         1.7305931],
       [ 8.666577 ,  4.984076 ,  6.3375473, ..., 10.800936 ,  1.27578  ,
         1.7305931]], dtype=float32)

array([[ 8.666577 ,  4.984076 ,  6.3375483, ..., 10.800936 ,  1.2757798,
         1.730593 ],
       [ 8.666577 ,  4.9840755,  6.3375473, ..., 10.800935 ,  1.27578  ,
         1.7305931],
       [ 8.666577 ,  4.984076 ,  6.3375483, ..., 10.800936 ,  1.27578  ,
         1.7305931],
       [ 8.666577 ,  4.984076 ,  6.3375483, ..., 10.800936 ,  1.2757798,
         1.7305931]], dtype=float32)

array([[ 8.666577 ,  4.984076 ,  6.3375473, ..., 10.800936 ,  1.27578  ,
         1.7305931],
       [ 8.666577 ,  4.984076 ,  6.3375483, ..., 10.800935 ,  1.2757797,
         1.730593 ],
       [ 8.666577 ,  4.984076 ,  6.337547 , ..., 10.800935 ,  1.27578  ,
         1.7305931],
       [ 8.666577 ,  4.984076 ,  6.337547 , ..., 10.800935 ,  1.2757798,
         1.7305928]], dtype=float32)

array([[ 8.666577 ,  4.984076 ,  6.3375483, ..., 10.800936 ,  1.27578  ,
         1.7305931],
       [ 8.666577 ,  4.9840765,  6.3375483, ..., 10.800936 ,  1.2757801,
         1.7305932],
       [ 8.666577 ,  4.984076 ,  6.3375483, ..., 10.800936 ,  1.27578  ,
         1.7305931],
       [ 8.666577 ,  4.984076 ,  6.3375483, ..., 10.800936 ,  1.27578  ,
         1.7305931]], dtype=float32)

array([[ 8.666577 ,  4.984076 ,  6.3375483, ..., 10.800937 ,  1.27578  ,
         1.7305931],
       [ 8.666577 ,  4.984076 ,  6.3375483, ..., 10.800935 ,  1.27578  ,
         1.7305931],
       [ 8.666577 ,  4.984076 ,  6.3375483, ..., 10.800936 ,  1.2757801,
         1.7305933],
       [ 8.666577 ,  4.984076 ,  6.3375483, ..., 10.800936 ,  1.2757801,
         1.7305931]], dtype=float32)

array([[ 8.666577 ,  4.984076 ,  6.3375483, ..., 10.800936 ,  1.2757802,
         1.7305931],
       [ 8.666577 ,  4.984076 ,  6.3375473, ..., 10.800936 ,  1.27578  ,
         1.7305931],
       [ 8.666577 ,  4.984076 ,  6.3375483, ..., 10.800936 ,  1.2757801,
         1.7305928],
       [ 8.666577 ,  4.984076 ,  6.3375483, ..., 10.800936 ,  1.27578  ,
         1.7305931]], dtype=float32)

array([[ 8.666577 ,  4.984076 ,  6.3375483, ..., 10.800936 ,  1.2757802,
         1.7305931],
       [ 8.666577 ,  4.984076 ,  6.3375483, ..., 10.800936 ,  1.2757801,
         1.7305931],
       [ 8.666577 ,  4.984076 ,  6.3375473, ..., 10.800935 ,  1.2757801,
         1.7305931],
       [ 8.666577 ,  4.984076 ,  6.3375483, ..., 10.800936 ,  1.27578  ,
         1.7305931]], dtype=float32)

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_transform.bias', 'vocab_projector.bias', 'vocab_transform.weight', 'vocab_layer_norm.bias', 'vocab_layer_norm.weight', 'vocab_projector.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_transform.bias', 'vocab_projector.bias', 'vocab_transform.weight', 'vocab_layer_norm.bias', 'vocab_layer_norm.weight', 'vocab_projector.weight']
- T

None 0
epoch 0 version: 4


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.6473 loss: 0.5979720050871372
train val: 0.6787 loss: 0.575093693369627
test: 0.5759 loss: 0.6906725792646408
epoch time: 12.64min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_0.pkl
best train: 0.6473
best train val: 0.6787
best test: 0.5759
epoch 1 version: 4


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.7213 loss: 0.5294552909135819
train val: 0.6823 loss: 0.553416972476244
test: 0.5997 loss: 0.6605589924693107
epoch time: 12.79min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_1.pkl
best train: 0.7213
best train val: 0.6823
best test: 0.5997
epoch 2 version: 4


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.6593 loss: 0.5678863708436489
train val: 0.6289 loss: 0.6924168056726455
test: 0.4748 loss: 0.6930315237998962
epoch time: 12.62min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_1.pkl
best train: 0.7213
best train val: 0.6823
best test: 0.5997
epoch 3 version: 4


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.5696 loss: 0.6696468878030777
train val: 0.5442 loss: 0.6889070762872695
test: 0.4909 loss: 0.6965346777677536
epoch time: 12.78min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_1.pkl
best train: 0.7213
best train val: 0.6823
best test: 0.5997
epoch 4 version: 4


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.52475 loss: 0.6854286384940147
train val: 0.5652 loss: 0.6931220393657684
test: 0.4913 loss: 0.6931569410085678
epoch time: 12.56min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_1.pkl
best train: 0.7213
best train val: 0.6823
best test: 0.5997
epoch 5 version: 4


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.5511 loss: 0.6695182458877563
train val: 0.6396 loss: 0.5817186285912991
test: 0.506 loss: 0.6938764681696892
epoch time: 12.80min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_1.pkl
best train: 0.7213
best train val: 0.6823
best test: 0.5997
epoch 6 version: 4


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.62155 loss: 0.6163800644427538
train val: 0.5197 loss: 0.677640681540966
test: 0.4857 loss: 0.7043273192882538
epoch time: 12.76min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v4_lg_3.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_3.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_1.pkl
best train: 0.7213
best train val: 0.6823
best test: 0.5997
epoch 7 version: 4


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.5673 loss: 0.6721110775649548
train val: 0.6478 loss: 0.6347744095683098
test: 0.5387 loss: 0.7220456448793411
epoch time: 12.45min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v4_lg_6.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_6.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_1.pkl
best train: 0.7213
best train val: 0.6823
best test: 0.5997
epoch 8 version: 4


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.62775 loss: 0.6175937473803759
train val: 0.6549 loss: 0.5710406693935395
test: 0.5316 loss: 0.6884492003202438
epoch time: 12.63min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v4_lg_4.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_4.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_1.pkl
best train: 0.7213
best train val: 0.6823
best test: 0.5997
epoch 9 version: 4


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.6745 loss: 0.5749760392427444
train val: 0.6123 loss: 0.6042520640552044
test: 0.5169 loss: 0.7074414461016655
epoch time: 12.53min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v4_lg_2.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_2.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_1.pkl
best train: 0.7213
best train val: 0.6823
best test: 0.5997
epoch 10 version: 4


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.61345 loss: 0.6341280465304852
train val: 0.439 loss: 0.6886432605743408
test: 0.4892 loss: 0.693917150425911
epoch time: 12.42min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v4_lg_9.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_9.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_1.pkl
best train: 0.7213
best train val: 0.6823
best test: 0.5997
epoch 11 version: 4


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.57825 loss: 0.6635438888758421
train val: 0.601 loss: 0.6929783692121506
test: 0.5315 loss: 0.69292891933918
epoch time: 12.79min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v4_lg_10.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_10.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_1.pkl
best train: 0.7213
best train val: 0.6823
best test: 0.5997
epoch 12 version: 4


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.56905 loss: 0.6654649960964918
train val: 0.617 loss: 0.6070077902972698
test: 0.5147 loss: 0.7037773508429527
epoch time: 12.65min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v4_lg_11.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_11.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_1.pkl
best train: 0.7213
best train val: 0.6823
best test: 0.5997
epoch 13 version: 4


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.6018 loss: 0.5909450045377016
train val: 0.6106 loss: 0.5732471791267395
test: 0.5416 loss: 0.6771932997941971
epoch time: 12.69min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v4_lg_12.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_12.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_1.pkl
best train: 0.7213
best train val: 0.6823
best test: 0.5997
epoch 14 version: 4


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.59815 loss: 0.597304334077239
train val: 0.6184 loss: 0.5971737518250942
test: 0.5064 loss: 0.6851110226750374
epoch time: 12.79min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v4_lg_13.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_13.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_1.pkl
best train: 0.7213
best train val: 0.6823
best test: 0.5997
epoch 15 version: 4


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.59585 loss: 0.603380867883563
train val: 0.6341 loss: 0.5889571899652482
test: 0.514 loss: 0.6869030817508698
epoch time: 12.84min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v4_lg_14.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_14.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_1.pkl
best train: 0.7213
best train val: 0.6823
best test: 0.5997
epoch 16 version: 4


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.5994 loss: 0.5967513921290636
train val: 0.5768 loss: 0.577267360842228
test: 0.4891 loss: 0.6849916202545167
epoch time: 12.49min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v4_lg_15.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_15.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_1.pkl
best train: 0.7213
best train val: 0.6823
best test: 0.5997
epoch 17 version: 4


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.6131 loss: 0.5971927326887846
train val: 0.6104 loss: 0.6514701115429401
test: 0.5157 loss: 0.7342007438838482
epoch time: 12.59min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v4_lg_16.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_16.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_1.pkl
best train: 0.7213
best train val: 0.6823
best test: 0.5997
epoch 18 version: 4


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.54225 loss: 0.6836750732004643
train val: 0.4891 loss: 0.692934410238266
test: 0.4883 loss: 0.693694920039177
epoch time: 12.36min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v4_lg_17.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_17.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_1.pkl
best train: 0.7213
best train val: 0.6823
best test: 0.5997
epoch 19 version: 4


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.51605 loss: 0.6915618938922882
train val: 0.451 loss: 0.6931702060699463
test: 0.5007 loss: 0.6932228076457977
epoch time: 12.73min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v4_lg_18.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_18.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_1.pkl
best train: 0.7213
best train val: 0.6823
best test: 0.5997
epoch 20 version: 4


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.5373 loss: 0.6887928634524345
train val: 0.5858 loss: 0.6930787186145783
test: 0.5217 loss: 0.6930987491130829
epoch time: 12.64min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v4_lg_19.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_19.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_1.pkl
best train: 0.7213
best train val: 0.6823
best test: 0.5997
epoch 21 version: 4


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.53705 loss: 0.6896456265866756
train val: 0.509 loss: 0.6931553163528442
test: 0.5143 loss: 0.6952776779890061
epoch time: 12.44min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v4_lg_20.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_20.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_1.pkl
best train: 0.7213
best train val: 0.6823
best test: 0.5997
epoch 22 version: 4


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.5736 loss: 0.6579242166757584
train val: 0.635 loss: 0.6006361843883992
test: 0.5064 loss: 0.6923291197061539
epoch time: 12.50min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v4_lg_21.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_21.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_1.pkl
best train: 0.7213
best train val: 0.6823
best test: 0.5997
epoch 23 version: 4


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.63475 loss: 0.5888750919282436
train val: 0.6262 loss: 0.5819112200319767
test: 0.5002 loss: 0.6941292966723442
epoch time: 12.68min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v4_lg_22.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_22.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_1.pkl
best train: 0.7213
best train val: 0.6823
best test: 0.5997
epoch 24 version: 4


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.6304 loss: 0.602682014632225
train val: 0.6239 loss: 0.619433401453495
test: 0.4983 loss: 0.6963883791327476
epoch time: 12.50min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v4_lg_23.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_23.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_1.pkl
best train: 0.7213
best train val: 0.6823
best test: 0.5997
epoch 25 version: 4


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.64255 loss: 0.5708352890789509
train val: 0.6435 loss: 0.5746357249498367
test: 0.5103 loss: 0.6955260101079941
epoch time: 12.78min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v4_lg_24.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_24.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_1.pkl
best train: 0.7213
best train val: 0.6823
best test: 0.5997
epoch 26 version: 4


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.6486 loss: 0.5668161426365376
train val: 0.6199 loss: 0.5714111444711685
test: 0.5002 loss: 0.6821892236232757
epoch time: 12.65min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v4_lg_5.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_5.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_1.pkl
best train: 0.7213
best train val: 0.6823
best test: 0.5997
epoch 27 version: 4


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.63385 loss: 0.5735134291231633
train val: 0.622 loss: 0.5694981135010719
test: 0.4979 loss: 0.68143065533638
epoch time: 12.66min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v4_lg_26.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_26.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_1.pkl
best train: 0.7213
best train val: 0.6823
best test: 0.5997
epoch 28 version: 4


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.6234 loss: 0.576854264691472
train val: 0.612 loss: 0.5674688564717769
test: 0.5025 loss: 0.6799674209952354
epoch time: 12.51min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v4_lg_27.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_27.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_1.pkl
best train: 0.7213
best train val: 0.6823
best test: 0.5997
epoch 29 version: 4


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.60755 loss: 0.6077915422469378
train val: 0.4885 loss: 0.6944189430952072
test: 0.4872 loss: 0.7097502334594726
epoch time: 12.29min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v4_lg_28.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_28.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v4_lg_1.pkl
best train: 0.7213
best train val: 0.6823
best test: 0.5997


test val:   0%|          | 0/10000 [00:00<?, ?it/s]

test val: {'accuracy': 0.4824} loss: 0.711129313826561
correct


Unnamed: 0,gen_name,parent_left,child_left,parent_right,child_right,sway_left,sway_right,correct_is_right,logit_left,logit_right,preds,truth
2,*valid--!copy;(sw);(st),She actually has a song where this is the poin...,"Yep, this one came to mind. She is incredible ...","Yep, this one came to mind. She is incredible ...",She actually has a song where this is the poin...,0.997527,0.002473,False,0.500002,0.499998,0,0
3,*valid--random;(mtl:20);(sw);(st),The cigarette industry social lied about cigar...,covid vaccines can protect from covid,“Women get sex so easy!” “Man I bet no one wan...,You guys watch porn with sound on??? What a pr...,0.880797,0.119203,False,0.504899,0.495101,0,0
0,random--valid;(mtl:20);(sw);(st),So that's where it came from albeit it's still...,Yeah that's pretty much what he did. Epstein h...,I work in a kitchen full of men and it's quite...,Tears are for the French,0.000911,0.999089,True,0.398905,0.601095,1,1
1,random--valid;(mtl:20);(sw);(st),I am uncomfortably comfortable not trying.,They're slightly better than nickleback. Haha,"US uses 120 VAC (RMS), so the wattage is half ...",Interesting!! I came into this not knowing muc...,0.119203,0.880797,True,0.499998,0.500002,1,1
2,*valid--random;(sw);(st),$5 and I'll never tell him i won as I wouldn't...,Sounds like a plot to a movie lol,"The last iteration of them yeah, just thanks t...",Don't throw away your money. When you get paid...,0.880797,0.119203,False,0.601102,0.398898,0,0


incorrect


Unnamed: 0,gen_name,parent_left,child_left,parent_right,child_right,sway_left,sway_right,correct_is_right,logit_left,logit_right,preds,truth
0,!copy--valid;(mtl:20);(sw);(st),Gonna ride the baloney pony all the way to Was...,I'd literally take the law into my own hands.,I'd literally take the law into my own hands.,Gonna ride the baloney pony all the way to Was...,4.662937e-15,1.0,True,0.596427,0.403573,0,1
1,*valid--!copy;(mtl:20);(sw);(st),Men are still plenty selective about who they ...,"Men are selective, as in, they'll select from ...","Men are selective, as in, they'll select from ...",Men are still plenty selective about who they ...,0.9998766,0.000123,False,0.499994,0.500006,1,0
2,random--valid;(sw);(st),Dungeons and Dragons,Did you see this post a week ago and got jealo...,"Question for the men, what's the worst part of...",When sitting wrong on your bike seat and cutti...,0.1192029,0.880797,True,0.500008,0.499992,0,1
3,random--valid;(sw);(st),I always brush my teeth but need to floss more...,As an american all I can say is: why?,[removed],User profile checks out. Bet you're on a list ...,0.1192029,0.880797,True,0.601125,0.398875,0,1
0,*valid--random;(sw);(st),And they taste delicious when chopped into hal...,"In Spain/Portugal we also cook them ""a feira"" ...",Seinfeld seems to be the one that paved the wa...,Wiping down gym equipment.,0.9933071,0.006693,False,0.398877,0.601123,1,0


array([[ -5.5141153,   9.044989 , -31.054817 , ...,  -5.765789 ,
          1.6973658,  38.0201   ],
       [  2.4493523,  -1.2062029, -25.710094 , ..., -30.202929 ,
         28.304255 ,  29.005411 ],
       [  2.4443328,  -1.1933109, -25.796654 , ..., -30.265146 ,
         28.360348 ,  29.088915 ],
       [ -4.8776374,   8.160881 , -27.576668 , ...,  -4.6755104,
          0.6063639,  34.189426 ]], dtype=float32)

array([[  2.4481952,  -1.2033818, -25.72742  , ..., -30.214876 ,
         28.315321 ,  29.02198  ],
       [  2.4477866,  -1.2023114, -25.735184 , ..., -30.220766 ,
         28.320482 ,  29.029535 ],
       [  2.4462852,  -1.1985096, -25.760689 , ..., -30.23946  ,
         28.337133 ,  29.054201 ],
       [  2.4459023,  -1.1975831, -25.765926 , ..., -30.242685 ,
         28.340334 ,  29.059114 ]], dtype=float32)

array([[  2.445209  ,  -1.1956297 , -25.78091   , ..., -30.254072  ,
         28.350254  ,  29.073763  ],
       [  2.4468923 ,  -1.200091  , -25.74943   , ..., -30.230898  ,
         28.329674  ,  29.04323   ],
       [  2.4487844 ,  -1.2048225 , -25.718475  , ..., -30.208658  ,
         28.309593  ,  29.013414  ],
       [ -4.878682  ,   8.162063  , -27.581423  , ...,  -4.676206  ,
          0.60657036,  34.195087  ]], dtype=float32)

array([[ -4.8785563,   8.161921 , -27.58085  , ...,  -4.6761227,
          0.6065458,  34.1944   ],
       [  2.4408793,  -1.1840903, -25.859398 , ..., -30.309908 ,
         28.40057  ,  29.149448 ],
       [  2.4493713,  -1.2062863, -25.708912 , ..., -30.201809 ,
         28.303381 ,  29.004202 ],
       [ -5.5141273,   9.04501  , -31.0549   , ...,  -5.7658443,
          1.6974282,  38.020176 ]], dtype=float32)

array([[  2.4385178 ,  -1.1777536 , -25.901848  , ..., -30.340334  ,
         28.427639  ,  29.190416  ],
       [ -5.514207  ,   9.045146  , -31.055452  , ...,  -5.766234  ,
          1.6978741 ,  38.02068   ],
       [ -4.8784933 ,   8.161849  , -27.580564  , ...,  -4.67608   ,
          0.60653305,  34.194057  ],
       [ -4.87847   ,   8.161822  , -27.580456  , ...,  -4.6760645 ,
          0.60652864,  34.19393   ]], dtype=float32)

array([[ -4.87859   ,   8.161959  , -27.581005  , ...,  -4.6761446 ,
          0.60655254,  34.194588  ],
       [ -5.514116  ,   9.044991  , -31.054821  , ...,  -5.765791  ,
          1.6973675 ,  38.020107  ],
       [  2.4401698 ,  -1.1821436 , -25.873573  , ..., -30.321194  ,
         28.409954  ,  29.163486  ],
       [ -4.8775597 ,   8.160793  , -27.576313  , ...,  -4.675459  ,
          0.60634834,  34.189007  ]], dtype=float32)

array([[  2.45079   ,  -1.2098842 , -25.684277  , ..., -30.183773  ,
         28.287184  ,  28.980368  ],
       [  2.45063   ,  -1.2094291 , -25.688234  , ..., -30.186987  ,
         28.289919  ,  28.984282  ],
       [  2.4494162 ,  -1.2063949 , -25.708258  , ..., -30.201365  ,
         28.302963  ,  29.003578  ],
       [ -4.87862   ,   8.161992  , -27.58114   , ...,  -4.6761646 ,
          0.60655844,  34.194744  ]], dtype=float32)

array([[  2.4337804,  -1.1642884, -25.989208 , ..., -30.400358 ,
         28.482494 ,  29.274292 ],
       [ -4.8785667,   8.161932 , -27.580898 , ...,  -4.6761293,
          0.6065476,  34.194458 ],
       [ -5.5141487,   9.045046 , -31.05505  , ...,  -5.765951 ,
          1.6975509,  38.020317 ],
       [  2.4473264,  -1.2012572, -25.740698 , ..., -30.22417  ,
         28.323845 ,  29.034712 ]], dtype=float32)

array([[  2.443903  ,  -1.1920716 , -25.807037  , ..., -30.27395   ,
         28.36748   ,  29.099361  ],
       [ -4.8786235 ,   8.161997  , -27.58116   , ...,  -4.6761675 ,
          0.60655904,  34.194767  ],
       [  2.4425223 ,  -1.1884961 , -25.829363  , ..., -30.288313  ,
         28.381304  ,  29.120428  ],
       [  2.4463482 ,  -1.1986724 , -25.759533  , ..., -30.238575  ,
         28.336367  ,  29.053072  ]], dtype=float32)

array([[ -4.8785057,   8.161863 , -27.58062  , ...,  -4.6760883,
          0.6065355,  34.19413  ],
       [  2.447299 ,  -1.2010677, -25.743698 , ..., -30.227066 ,
         28.32607  ,  29.037786 ],
       [  2.4476943,  -1.2020344, -25.737751 , ..., -30.222965 ,
         28.322279 ,  29.032103 ],
       [ -4.8785753,   8.161942 , -27.580938 , ...,  -4.676135 ,
          0.606549 ,  34.194504 ]], dtype=float32)

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_transform.bias', 'vocab_projector.bias', 'vocab_transform.weight', 'vocab_layer_norm.bias', 'vocab_layer_norm.weight', 'vocab_projector.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_transform.bias', 'vocab_projector.bias', 'vocab_transform.weight', 'vocab_layer_norm.bias', 'vocab_layer_norm.weight', 'vocab_projector.weight']
- T

None 0
epoch 0 version: 5


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.5124 loss: 15.49084200267191
train val: 0.5607 loss: 0.692069203710556
test: 0.5275 loss: 0.7090290358424187
epoch time: 12.08min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_0.pkl
best train: 0.5124
best train val: 0.5607
best test: 0.5275
epoch 1 version: 5


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.5018 loss: 12.30151793807464
train val: 0.4277 loss: 0.6954786846637726
test: 0.4827 loss: 0.6938259245872498
epoch time: 12.24min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_0.pkl
best train: 0.5124
best train val: 0.5607
best test: 0.5275
epoch 2 version: 5


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.5012 loss: 10.787513082538497
train val: 0.5257 loss: 0.6936739743232727
test: 0.5372 loss: 0.6931018321752548
epoch time: 12.19min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_0.pkl
best train: 0.5124
best train val: 0.5607
best test: 0.5275
epoch 3 version: 5


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.50785 loss: 9.027155975110782
train val: 0.5515 loss: 0.6929506484270096
test: 0.5216 loss: 0.6930907460212707
epoch time: 12.14min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_0.pkl
best train: 0.5124
best train val: 0.5607
best test: 0.5275
epoch 4 version: 5


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.50085 loss: 8.070581946259194
train val: 0.4682 loss: 0.6934021499633789
test: 0.4415 loss: 0.6932738023519516
epoch time: 11.98min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_0.pkl
best train: 0.5124
best train val: 0.5607
best test: 0.5275
epoch 5 version: 5


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.4967 loss: 6.743519980129495
train val: 0.4195 loss: 0.6932168829441071
test: 0.498 loss: 0.6931307865858078
epoch time: 12.16min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_0.pkl
best train: 0.5124
best train val: 0.5607
best test: 0.5275
epoch 6 version: 5


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.50905 loss: 5.460568293827223
train val: 0.4136 loss: 0.6932007447957993
test: 0.485 loss: 0.6931213898658752
epoch time: 12.09min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v5_lg_5.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_5.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_0.pkl
best train: 0.5124
best train val: 0.5607
best test: 0.5275
epoch 7 version: 5


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.5024 loss: 4.644774866327901
train val: 0.5846 loss: 0.6923048345088959
test: 0.4642 loss: 0.7031250957489014
epoch time: 12.05min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v5_lg_6.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_6.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_7.pkl
best train: 0.5024
best train val: 0.5846
best test: 0.4642
epoch 8 version: 5


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.50375 loss: 3.5469214188688856
train val: 0.5232 loss: 0.6931337061405182
test: 0.5298 loss: 0.6931248193740844
epoch time: 11.53min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v5_lg_1.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_1.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_7.pkl
best train: 0.5024
best train val: 0.5846
best test: 0.4642
epoch 9 version: 5


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.5078 loss: 2.985410696429416
train val: 0.4967 loss: 0.6931561813354492
test: 0.4924 loss: 0.6931466828584671
epoch time: 12.16min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v5_lg_4.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_4.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_7.pkl
best train: 0.5024
best train val: 0.5846
best test: 0.4642
epoch 10 version: 5


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.5047 loss: 2.445631775299646
train val: 0.5239 loss: 0.693143875670433
test: 0.5026 loss: 0.6931472195386886
epoch time: 12.03min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v5_lg_9.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_9.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_7.pkl
best train: 0.5024
best train val: 0.5846
best test: 0.4642
epoch 11 version: 5


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.5005 loss: 2.0753793276003094
train val: 0.5198 loss: 0.6931455617666245
test: 0.4997 loss: 0.6931502801418304
epoch time: 12.11min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v5_lg_8.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_8.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_7.pkl
best train: 0.5024
best train val: 0.5846
best test: 0.4642
epoch 12 version: 5


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.5041 loss: 1.8059638323711698
train val: 0.495 loss: 0.6931466546058654
test: 0.5116 loss: 0.6931426744937896
epoch time: 11.56min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v5_lg_11.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_11.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_7.pkl
best train: 0.5024
best train val: 0.5846
best test: 0.4642
epoch 13 version: 5


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.50395 loss: 1.5710935600684024
train val: 0.5155 loss: 0.6931433555603027
test: 0.5128 loss: 0.6931441612243653
epoch time: 12.22min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v5_lg_12.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_12.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_7.pkl
best train: 0.5024
best train val: 0.5846
best test: 0.4642
epoch 14 version: 5


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.50355 loss: 1.6887984605108388
train val: 0.6005 loss: 1.561040269088745
test: 0.5149 loss: 2.3436809191823005
epoch time: 11.82min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v5_lg_13.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_13.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_14.pkl
best train: 0.50355
best train val: 0.6005
best test: 0.5149
epoch 15 version: 5


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.5322 loss: 2.538003856804408
train val: 0.6048 loss: 1.4093581914186477
test: 0.5037 loss: 2.0866986381530763
epoch time: 12.18min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v5_lg_10.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_10.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_15.pkl
best train: 0.5322
best train val: 0.6048
best test: 0.5037
epoch 16 version: 5


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.54 loss: 2.0165807614155113
train val: 0.4941 loss: 1.8071628792226315
test: 0.4837 loss: 2.2921057878255846
epoch time: 12.27min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v5_lg_2.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_2.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_15.pkl
best train: 0.5322
best train val: 0.6048
best test: 0.5037
epoch 17 version: 5


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.55615 loss: 0.982832669574488
train val: 0.6619 loss: 0.622569235156104
test: 0.5501 loss: 0.7459016922354699
epoch time: 12.23min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v5_lg_16.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_16.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_17.pkl
best train: 0.55615
best train val: 0.6619
best test: 0.5501
epoch 18 version: 5


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.67735 loss: 0.7766322918241378
train val: 0.67 loss: 1.5042771409680542
test: 0.6358 loss: 1.0148659125864505
epoch time: 12.02min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v5_lg_3.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_3.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_18.pkl
best train: 0.67735
best train val: 0.67
best test: 0.6358
epoch 19 version: 5


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.70295 loss: 0.9138207558692666
train val: 0.6774 loss: 0.5632765248656273
test: 0.5952 loss: 0.6863343617528677
epoch time: 12.27min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v5_lg_0.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_0.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_19.pkl
best train: 0.70295
best train val: 0.6774
best test: 0.5952
epoch 20 version: 5


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.7412 loss: 0.5156928931663962
train val: 0.6819 loss: 0.5426402902441165
test: 0.6138 loss: 0.6615298364579678
epoch time: 12.44min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v5_lg_7.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_7.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_20.pkl
best train: 0.7412
best train val: 0.6819
best test: 0.6138
epoch 21 version: 5


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.7634 loss: 0.4596094323984882
train val: 0.6849 loss: 0.5210247419701333
test: 0.6001 loss: 0.6599770497560501
epoch time: 12.02min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v5_lg_14.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_14.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_21.pkl
best train: 0.7634
best train val: 0.6849
best test: 0.6001
epoch 22 version: 5


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.77255 loss: 0.4329931208132293
train val: 0.6861 loss: 0.5478422407143662
test: 0.5815 loss: 0.6981615740478039
epoch time: 11.76min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v5_lg_15.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_15.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_22.pkl
best train: 0.77255
best train val: 0.6861
best test: 0.5815
epoch 23 version: 5


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.77825 loss: 0.41491101223200677
train val: 0.7007 loss: 0.5198177046968805
test: 0.6006 loss: 0.6663915285646915
epoch time: 11.88min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v5_lg_17.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_17.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_23.pkl
best train: 0.77825
best train val: 0.7007
best test: 0.6006
epoch 24 version: 5


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.79095 loss: 0.39369543007077007
train val: 0.6939 loss: 0.5270905346778294
test: 0.6213 loss: 0.6574454555630684
epoch time: 12.22min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v5_lg_18.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_18.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_23.pkl
best train: 0.77825
best train val: 0.7007
best test: 0.6006
epoch 25 version: 5


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.7942 loss: 0.38079975544025585
train val: 0.7028 loss: 0.5169964360220823
test: 0.6106 loss: 0.6675813419759273
epoch time: 11.80min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v5_lg_19.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_19.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_25.pkl
best train: 0.7942
best train val: 0.7028
best test: 0.6106
epoch 26 version: 5


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.80735 loss: 0.4033535264191989
train val: 0.7014 loss: 0.5244288647991383
test: 0.5989 loss: 0.6822789943933487
epoch time: 11.92min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v5_lg_20.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_20.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_25.pkl
best train: 0.7942
best train val: 0.7028
best test: 0.6106
epoch 27 version: 5


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.8041 loss: 0.36627331793508494
train val: 0.7102 loss: 0.5047814257414429
test: 0.5841 loss: 0.6739215078115464
epoch time: 12.25min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v5_lg_21.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_21.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_27.pkl
best train: 0.8041
best train val: 0.7102
best test: 0.5841
epoch 28 version: 5


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.81065 loss: 0.3531682250114143
train val: 0.697 loss: 0.5190192678402644
test: 0.5679 loss: 0.6865593840956687
epoch time: 11.96min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v5_lg_22.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_22.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_27.pkl
best train: 0.8041
best train val: 0.7102
best test: 0.5841
epoch 29 version: 5


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.8262 loss: 0.3322411826904223
train val: 0.7181 loss: 0.4973251761728883
test: 0.6045 loss: 0.6627839342415333
epoch time: 12.08min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v5_lg_24.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_24.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v5_lg_29.pkl
best train: 0.8262
best train val: 0.7181
best test: 0.6045


test val:   0%|          | 0/10000 [00:00<?, ?it/s]

test val: {'accuracy': 0.6147} loss: 0.6567791040837765
correct


Unnamed: 0,gen_name,parent_left,child_left,parent_right,child_right,sway_left,sway_right,correct_is_right,logit_left,logit_right,preds,truth
2,random--valid;(sw);(st),Dungeons and Dragons,Did you see this post a week ago and got jealo...,"Question for the men, what's the worst part of...",When sitting wrong on your bike seat and cutti...,0.119203,0.8807971,True,0.306294,0.693706,1,1
3,random--valid;(sw);(st),I always brush my teeth but need to floss more...,As an american all I can say is: why?,[removed],User profile checks out. Bet you're on a list ...,0.119203,0.8807971,True,0.001176,0.998824,1,1
0,*valid--random;(sw);(st),And they taste delicious when chopped into hal...,"In Spain/Portugal we also cook them ""a feira"" ...",Seinfeld seems to be the one that paved the wa...,Wiping down gym equipment.,0.993307,0.006692851,False,0.507354,0.492646,0,0
1,*valid--!copy;(mtl:20);(sw);(st),Yeah that's a thing. I spend my summer vacatio...,you ever done on going maintenance on a boat? ...,you ever done on going maintenance on a boat? ...,Yeah that's a thing. I spend my summer vacatio...,1.0,1.5628820000000001e-18,False,0.706697,0.293303,0,0
2,*valid--!copy;(sw);(st),She actually has a song where this is the poin...,"Yep, this one came to mind. She is incredible ...","Yep, this one came to mind. She is incredible ...",She actually has a song where this is the poin...,0.997527,0.002472623,False,0.549429,0.450571,0,0


incorrect


Unnamed: 0,gen_name,parent_left,child_left,parent_right,child_right,sway_left,sway_right,correct_is_right,logit_left,logit_right,preds,truth
0,!copy--valid;(mtl:20);(sw);(st),Gonna ride the baloney pony all the way to Was...,I'd literally take the law into my own hands.,I'd literally take the law into my own hands.,Gonna ride the baloney pony all the way to Was...,4.662937e-15,1.0,True,0.761206,0.238794,0,1
1,*valid--!copy;(mtl:20);(sw);(st),Men are still plenty selective about who they ...,"Men are selective, as in, they'll select from ...","Men are selective, as in, they'll select from ...",Men are still plenty selective about who they ...,0.9998766,0.000123,False,0.473718,0.526282,1,0
3,*valid--random;(mtl:20);(sw);(st),The cigarette industry social lied about cigar...,covid vaccines can protect from covid,“Women get sex so easy!” “Man I bet no one wan...,You guys watch porn with sound on??? What a pr...,0.8807971,0.119203,False,0.418727,0.581273,1,0
1,random--valid;(mtl:20);(sw);(st),I am uncomfortably comfortable not trying.,They're slightly better than nickleback. Haha,"US uses 120 VAC (RMS), so the wattage is half ...",Interesting!! I came into this not knowing muc...,0.1192029,0.880797,True,0.626153,0.373847,0,1
2,*valid--random;(mtl:20);(sw);(st),“Repetitive discography” but you don’t even kn...,Are you not comprehending that *I'm* not the o...,Best kept as a fantasy.,*chumbawumba pumping in the background* ROUND 2,0.8807971,0.119203,False,0.458894,0.541106,1,0


array([[-0.67384326,  0.22092262,  0.75438   , ...,  0.3191897 ,
         0.7139604 ,  0.3768081 ],
       [-0.67370796,  0.22007982,  0.7529957 , ...,  0.3193534 ,
         0.7133509 ,  0.37701836],
       [-0.67408174,  0.222236  ,  0.75668603, ...,  0.31889346,
         0.714904  ,  0.37651086],
       [-0.6730024 ,  0.21514376,  0.7455417 , ...,  0.32014063,
         0.70964795,  0.37829036]], dtype=float32)

array([[-0.6729191 ,  0.21454546,  0.7446769 , ...,  0.32022795,
         0.7091843 ,  0.3784364 ],
       [-0.6741481 ,  0.222558  ,  0.75729245, ...,  0.31880838,
         0.71513486,  0.3764475 ],
       [-0.67383265,  0.2208571 ,  0.75427085, ...,  0.3192036 ,
         0.7139136 ,  0.3768245 ],
       [-0.6744545 ,  0.2237597 ,  0.75982285, ...,  0.3184105 ,
         0.716009  ,  0.37628248]], dtype=float32)

array([[-0.67386717,  0.22105782,  0.7546103 , ...,  0.3191638 ,
         0.7140591 ,  0.37677643],
       [-0.67464674,  0.22427867,  0.761165  , ...,  0.31816024,
         0.7163996 ,  0.3762866 ],
       [-0.6749009 ,  0.2247216 ,  0.76265913, ...,  0.31783524,
         0.71674854,  0.37640813],
       [-0.673444  ,  0.21830137,  0.7502163 , ...,  0.31965902,
         0.7120424 ,  0.37748066]], dtype=float32)

array([[-0.6745047 ,  0.22391213,  0.76019156, ...,  0.31834483,
         0.7161223 ,  0.3762758 ],
       [-0.67414314,  0.22253384,  0.7572463 , ...,  0.3188153 ,
         0.71511793,  0.376452  ],
       [-0.6737957 ,  0.22063446,  0.7539    , ...,  0.31924745,
         0.7137522 ,  0.37687877],
       [-0.6734011 ,  0.2179999 ,  0.74975896, ...,  0.3197076 ,
         0.71181846,  0.37756002]], dtype=float32)

array([[-0.6742808 ,  0.22313668,  0.75844544, ...,  0.31863704,
         0.7155525 ,  0.37634966],
       [-0.67510325,  0.22490834,  0.7636408 , ...,  0.3175854 ,
         0.71691215,  0.37658542],
       [-0.67411304,  0.22239088,  0.75697494, ...,  0.31885314,
         0.7150147 ,  0.3764797 ],
       [-0.67417824,  0.22269681,  0.7575607 , ...,  0.31876975,
         0.7152345 ,  0.37642172]], dtype=float32)

array([[-0.67510784,  0.22491129,  0.763662  , ...,  0.31757972,
         0.71691513,  0.3765904 ],
       [-0.6738604 ,  0.22102083,  0.7545465 , ...,  0.3191708 ,
         0.714032  ,  0.3767852 ],
       [-0.673881  ,  0.221157  ,  0.75476813, ...,  0.3191367 ,
         0.71414   ,  0.37674978],
       [-0.674677  ,  0.22434537,  0.7613592 , ...,  0.318121  ,
         0.71645   ,  0.37629417]], dtype=float32)

array([[-0.67441475,  0.22362918,  0.7595182 , ...,  0.31846306,
         0.71591276,  0.37629178],
       [-0.673668  ,  0.21981995,  0.7525795 , ...,  0.3194011 ,
         0.7131614 ,  0.3770851 ],
       [-0.6740287 ,  0.22196387,  0.75618887, ...,  0.31896105,
         0.71470904,  0.3765681 ],
       [-0.6750383 ,  0.22486238,  0.76334405, ...,  0.3176647 ,
         0.71686953,  0.37652162]], dtype=float32)

array([[-0.6732099 ,  0.2166364 ,  0.7477241 , ...,  0.31991896,
         0.71079147,  0.3779142 ],
       [-0.6748046 ,  0.22458383,  0.76212966, ...,  0.31795698,
         0.71663666,  0.37634742],
       [-0.67531186,  0.22497937,  0.76449424, ...,  0.31733596,
         0.7169971 ,  0.37682965],
       [-0.67329514,  0.21725026,  0.7486338 , ...,  0.3198251 ,
         0.71125567,  0.37775525]], dtype=float32)

array([[-0.6739897 ,  0.21697494,  0.75073165, ...,  0.32043427,
         0.7100134 ,  0.37912762],
       [-0.674153  ,  0.22258134,  0.7573373 , ...,  0.3188018 ,
         0.7151516 ,  0.376443  ],
       [-0.6732067 ,  0.21660897,  0.747684  , ...,  0.3199235 ,
         0.7107715 ,  0.37792155],
       [-0.67413694,  0.22250585,  0.7571926 , ...,  0.31882277,
         0.7150984 ,  0.3764574 ]], dtype=float32)

array([[-0.6753178 ,  0.22497961,  0.7645168 , ...,  0.3173287 ,
         0.71699816,  0.37683785],
       [-0.67442805,  0.22367427,  0.7596222 , ...,  0.31844515,
         0.7159459 ,  0.3762883 ],
       [-0.67329156,  0.21722272,  0.74859285, ...,  0.31982937,
         0.71123475,  0.37776247],
       [-0.67334026,  0.21756923,  0.7491105 , ...,  0.31977555,
         0.7114957 ,  0.37767228]], dtype=float32)

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_transform.bias', 'vocab_projector.bias', 'vocab_transform.weight', 'vocab_layer_norm.bias', 'vocab_layer_norm.weight', 'vocab_projector.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_transform.bias', 'vocab_projector.bias', 'vocab_transform.weight', 'vocab_layer_norm.bias', 'vocab_layer_norm.weight', 'vocab_projector.weight']
- T

None 0
epoch 0 version: 6


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.7332 loss: 0.48818620220576414
train val: 0.7525 loss: 0.46517444884608267
test: 0.6504 loss: 0.6184796117722988
epoch time: 12.03min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_0.pkl
best train: 0.7332
best train val: 0.7525
best test: 0.6504
epoch 1 version: 6


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.7872 loss: 0.4212688070033881
train val: 0.6824 loss: 0.5854752740688622
test: 0.6086 loss: 0.6712882932186127
epoch time: 12.46min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_0.pkl
best train: 0.7332
best train val: 0.7525
best test: 0.6504
epoch 2 version: 6


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.79115 loss: 0.40997804764311296
train val: 0.7405 loss: 0.4737699679868296
test: 0.637 loss: 0.6282015978753567
epoch time: 12.50min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_0.pkl
best train: 0.7332
best train val: 0.7525
best test: 0.6504
epoch 3 version: 6


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.80485 loss: 0.3846466132761416
train val: 0.7319 loss: 0.49165933673612017
test: 0.6467 loss: 0.6205471907138824
epoch time: 11.74min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_0.pkl
best train: 0.7332
best train val: 0.7525
best test: 0.6504
epoch 4 version: 6


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.80535 loss: 0.38252966411075723
train val: 0.7363 loss: 0.47933327532885595
test: 0.6149 loss: 0.6526708350300788
epoch time: 12.16min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_0.pkl
best train: 0.7332
best train val: 0.7525
best test: 0.6504
epoch 5 version: 6


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.8461 loss: 0.31605073712317533
train val: 0.737 loss: 0.5071698420344269
test: 0.6174 loss: 0.6666951045572758
epoch time: 11.94min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_0.pkl
best train: 0.7332
best train val: 0.7525
best test: 0.6504
epoch 6 version: 6


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.8442 loss: 0.3163593923499542
train val: 0.7235 loss: 0.5250659038059413
test: 0.5844 loss: 0.733663065969944
epoch time: 12.84min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v6_lg_1.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_1.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_0.pkl
best train: 0.7332
best train val: 0.7525
best test: 0.6504
epoch 7 version: 6


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.84055 loss: 0.31488618020583325
train val: 0.7263 loss: 0.5009109037939459
test: 0.5817 loss: 0.7140583528876304
epoch time: 12.60min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v6_lg_6.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_6.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_0.pkl
best train: 0.7332
best train val: 0.7525
best test: 0.6504
epoch 8 version: 6


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.83025 loss: 0.33325075525976683
train val: 0.6934 loss: 0.5208458706812001
test: 0.5786 loss: 0.6733221934378147
epoch time: 12.27min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v6_lg_7.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_7.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_0.pkl
best train: 0.7332
best train val: 0.7525
best test: 0.6504
epoch 9 version: 6


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.83535 loss: 0.3218698040960615
train val: 0.6907 loss: 0.5211916905514896
test: 0.5877 loss: 0.6706608513474465
epoch time: 12.51min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v6_lg_8.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_8.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_0.pkl
best train: 0.7332
best train val: 0.7525
best test: 0.6504
epoch 10 version: 6


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.80315 loss: 0.36219286434601855
train val: 0.7117 loss: 0.5031748682703823
test: 0.6008 loss: 0.6669673808336258
epoch time: 12.36min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v6_lg_9.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_9.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_0.pkl
best train: 0.7332
best train val: 0.7525
best test: 0.6504
epoch 11 version: 6


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.80875 loss: 0.35575528357946207
train val: 0.711 loss: 0.5080701637345628
test: 0.6083 loss: 0.6616555043578148
epoch time: 12.32min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v6_lg_10.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_10.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_0.pkl
best train: 0.7332
best train val: 0.7525
best test: 0.6504
epoch 12 version: 6


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.811 loss: 0.35205347564942713
train val: 0.7156 loss: 0.5058628937192261
test: 0.6014 loss: 0.6623292964696884
epoch time: 12.46min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v6_lg_11.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_11.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_0.pkl
best train: 0.7332
best train val: 0.7525
best test: 0.6504
epoch 13 version: 6


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.80725 loss: 0.35842145580022955
train val: 0.7189 loss: 0.4941935920235701
test: 0.5846 loss: 0.6750245284557342
epoch time: 12.19min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v6_lg_12.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_12.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_0.pkl
best train: 0.7332
best train val: 0.7525
best test: 0.6504
epoch 14 version: 6


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.8056 loss: 0.3561828934922232
train val: 0.7203 loss: 0.5061897926099002
test: 0.5974 loss: 0.6750096005201339
epoch time: 12.45min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v6_lg_13.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_13.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_0.pkl
best train: 0.7332
best train val: 0.7525
best test: 0.6504
epoch 15 version: 6


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.80745 loss: 0.35577637498407105
train val: 0.723 loss: 0.4898808456759434
test: 0.6059 loss: 0.6643300914943219
epoch time: 12.25min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v6_lg_14.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_14.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_0.pkl
best train: 0.7332
best train val: 0.7525
best test: 0.6504
epoch 16 version: 6


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.7991 loss: 0.4074606903744255
train val: 0.7125 loss: 0.5326093631253767
test: 0.6047 loss: 0.6897414577394724
epoch time: 12.28min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v6_lg_15.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_15.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_0.pkl
best train: 0.7332
best train val: 0.7525
best test: 0.6504
epoch 17 version: 6


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.79255 loss: 0.3845917424747602
train val: 0.7156 loss: 0.4903769145688508
test: 0.6173 loss: 0.6474854667544365
epoch time: 12.33min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v6_lg_16.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_16.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_0.pkl
best train: 0.7332
best train val: 0.7525
best test: 0.6504
epoch 18 version: 6


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.66695 loss: 13.690719044582185
train val: 0.6569 loss: 0.6899208131551743
test: 0.503 loss: 0.6937750320911408
epoch time: 12.11min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v6_lg_17.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_17.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_0.pkl
best train: 0.7332
best train val: 0.7525
best test: 0.6504
epoch 19 version: 6


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.49865 loss: 46.71370835829582
train val: 0.5758 loss: 0.6914377968549729
test: 0.4341 loss: 0.6937788139104843
epoch time: 12.49min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v6_lg_18.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_18.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_0.pkl
best train: 0.7332
best train val: 0.7525
best test: 0.6504
epoch 20 version: 6


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.50025 loss: 48.57904476507552
train val: 0.4201 loss: 0.6937505393981933
test: 0.4585 loss: 0.6933913734436035
epoch time: 12.59min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v6_lg_19.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_19.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_0.pkl
best train: 0.7332
best train val: 0.7525
best test: 0.6504
epoch 21 version: 6


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.4983 loss: 49.235184776296556
train val: 0.3785 loss: 0.6946483438491822
test: 0.488 loss: 0.693354755783081
epoch time: 12.31min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v6_lg_20.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_20.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_0.pkl
best train: 0.7332
best train val: 0.7525
best test: 0.6504
epoch 22 version: 6


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.4993 loss: 49.47641962960302
train val: 0.5745 loss: 0.6925731330871582
test: 0.5094 loss: 0.6930759880065918
epoch time: 12.22min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v6_lg_21.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_21.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_0.pkl
best train: 0.7332
best train val: 0.7525
best test: 0.6504
epoch 23 version: 6


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.5027 loss: 49.22736178179609
train val: 0.3527 loss: 0.737173505282402
test: 0.5379 loss: 0.6895982605934143
epoch time: 12.30min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v6_lg_22.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_22.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_0.pkl
best train: 0.7332
best train val: 0.7525
best test: 0.6504
epoch 24 version: 6


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.4991 loss: 49.660955799971745
train val: 0.6426 loss: 0.6855821759223938
test: 0.5055 loss: 0.6929944447278976
epoch time: 12.45min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v6_lg_23.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_23.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_0.pkl
best train: 0.7332
best train val: 0.7525
best test: 0.6504
epoch 25 version: 6


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.5029 loss: 49.38376763835287
train val: 0.4874 loss: 0.6858832339048385
test: 0.5195 loss: 0.7000978005170823
epoch time: 12.08min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v6_lg_24.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_24.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_0.pkl
best train: 0.7332
best train val: 0.7525
best test: 0.6504
epoch 26 version: 6


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.50505 loss: 49.238888828129376
train val: 0.4887 loss: 0.693315909576416
test: 0.5021 loss: 0.6931404533386231
epoch time: 12.51min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v6_lg_25.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_25.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_0.pkl
best train: 0.7332
best train val: 0.7525
best test: 0.6504
epoch 27 version: 6


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.50145 loss: 49.619895359927376
train val: 0.4792 loss: 0.6936970970153808
test: 0.4987 loss: 0.6931588249206543
epoch time: 12.17min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v6_lg_26.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_26.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_0.pkl
best train: 0.7332
best train val: 0.7525
best test: 0.6504
epoch 28 version: 6


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.49725 loss: 50.0638295347793
train val: 0.5137 loss: 0.6929742240905762
test: 0.5157 loss: 0.6928926872253418
epoch time: 11.83min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v6_lg_27.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_27.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_0.pkl
best train: 0.7332
best train val: 0.7525
best test: 0.6504
epoch 29 version: 6


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.50135 loss: 49.69540810501074
train val: 0.5323 loss: 2.0886081813686292
test: 0.4731 loss: 2.1254481077454517
epoch time: 11.94min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v6_lg_28.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_28.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v6_lg_0.pkl
best train: 0.7332
best train val: 0.7525
best test: 0.6504


test val:   0%|          | 0/10000 [00:00<?, ?it/s]

test val: {'accuracy': 0.4849} loss: 2.1199638451452136
correct


Unnamed: 0,gen_name,parent_left,child_left,parent_right,child_right,sway_left,sway_right,correct_is_right,logit_left,logit_right,preds,truth
1,*valid--!copy;(mtl:20);(sw);(st),Men are still plenty selective about who they ...,"Men are selective, as in, they'll select from ...","Men are selective, as in, they'll select from ...",Men are still plenty selective about who they ...,0.999877,0.000123,False,0.942507,0.057493,0,0
2,random--valid;(sw);(st),Dungeons and Dragons,Did you see this post a week ago and got jealo...,"Question for the men, what's the worst part of...",When sitting wrong on your bike seat and cutti...,0.119203,0.880797,True,0.468791,0.531209,1,1
3,random--valid;(sw);(st),I always brush my teeth but need to floss more...,As an american all I can say is: why?,[removed],User profile checks out. Bet you're on a list ...,0.119203,0.880797,True,0.488283,0.511717,1,1
2,*valid--!copy;(sw);(st),She actually has a song where this is the poin...,"Yep, this one came to mind. She is incredible ...","Yep, this one came to mind. She is incredible ...",She actually has a song where this is the poin...,0.997527,0.002473,False,0.5,0.5,0,0
1,random--valid;(mtl:20);(sw);(st),I am uncomfortably comfortable not trying.,They're slightly better than nickleback. Haha,"US uses 120 VAC (RMS), so the wattage is half ...",Interesting!! I came into this not knowing muc...,0.119203,0.880797,True,0.496094,0.503906,1,1


incorrect


Unnamed: 0,gen_name,parent_left,child_left,parent_right,child_right,sway_left,sway_right,correct_is_right,logit_left,logit_right,preds,truth
0,!copy--valid;(mtl:20);(sw);(st),Gonna ride the baloney pony all the way to Was...,I'd literally take the law into my own hands.,I'd literally take the law into my own hands.,Gonna ride the baloney pony all the way to Was...,4.662937e-15,1.0,True,0.600188,0.399812,0,1
0,*valid--random;(sw);(st),And they taste delicious when chopped into hal...,"In Spain/Portugal we also cook them ""a feira"" ...",Seinfeld seems to be the one that paved the wa...,Wiping down gym equipment.,0.9933071,0.006692851,False,0.000869,0.999131,1,0
1,*valid--!copy;(mtl:20);(sw);(st),Yeah that's a thing. I spend my summer vacatio...,you ever done on going maintenance on a boat? ...,you ever done on going maintenance on a boat? ...,Yeah that's a thing. I spend my summer vacatio...,1.0,1.5628820000000001e-18,False,0.000883,0.999117,1,0
3,*valid--random;(mtl:20);(sw);(st),The cigarette industry social lied about cigar...,covid vaccines can protect from covid,“Women get sex so easy!” “Man I bet no one wan...,You guys watch porn with sound on??? What a pr...,0.8807971,0.1192029,False,0.001525,0.998475,1,0
0,random--valid;(mtl:20);(sw);(st),So that's where it came from albeit it's still...,Yeah that's pretty much what he did. Epstein h...,I work in a kitchen full of men and it's quite...,Tears are for the French,0.0009110512,0.9990889,True,0.5,0.5,0,1


array([[-0.13402689,  0.11265951, -0.11515143, ...,  0.12815818,
         0.13821185, -0.12555055],
       [-0.13402689,  0.11265951, -0.11515143, ...,  0.12815818,
         0.13821185, -0.12555055],
       [-0.13402689,  0.11265951, -0.11515143, ...,  0.12815818,
         0.13821185, -0.12555055],
       [-0.13402689,  0.11265951, -0.11515143, ...,  0.12815818,
         0.13821185, -0.12555055]], dtype=float32)

array([[-0.13402689,  0.11265951, -0.11515143, ...,  0.12815818,
         0.13821185, -0.12555055],
       [-0.13402689,  0.11265951, -0.11515143, ...,  0.12815818,
         0.13821185, -0.12555055],
       [-0.13402689,  0.11265951, -0.11515143, ...,  0.12815818,
         0.13821185, -0.12555055],
       [-0.13402689,  0.11265951, -0.11515143, ...,  0.12815818,
         0.13821185, -0.12555055]], dtype=float32)

array([[-0.13402689,  0.11265951, -0.11515143, ...,  0.12815818,
         0.13821185, -0.12555055],
       [-0.13402689,  0.11265951, -0.11515143, ...,  0.12815818,
         0.13821185, -0.12555055],
       [-0.13402689,  0.11265951, -0.11515143, ...,  0.12815818,
         0.13821185, -0.12555055],
       [-0.13402689,  0.11265951, -0.11515143, ...,  0.12815818,
         0.13821185, -0.12555055]], dtype=float32)

array([[-0.13402689,  0.11265951, -0.11515143, ...,  0.12815818,
         0.13821185, -0.12555055],
       [-0.13402689,  0.11265951, -0.11515143, ...,  0.12815818,
         0.13821185, -0.12555055],
       [-0.13402689,  0.11265951, -0.11515143, ...,  0.12815818,
         0.13821185, -0.12555055],
       [-0.13402689,  0.11265951, -0.11515143, ...,  0.12815818,
         0.13821185, -0.12555055]], dtype=float32)

array([[-0.13402689,  0.11265951, -0.11515143, ...,  0.12815818,
         0.13821185, -0.12555055],
       [-0.13402689,  0.11265951, -0.11515143, ...,  0.12815818,
         0.13821185, -0.12555055],
       [-0.13402689,  0.11265951, -0.11515143, ...,  0.12815818,
         0.13821185, -0.12555055],
       [-0.13402689,  0.11265951, -0.11515143, ...,  0.12815818,
         0.13821185, -0.12555055]], dtype=float32)

array([[-0.13402689,  0.11265951, -0.11515143, ...,  0.12815818,
         0.13821185, -0.12555055],
       [-0.13402689,  0.11265951, -0.11515143, ...,  0.12815818,
         0.13821185, -0.12555055],
       [-0.13402689,  0.11265951, -0.11515143, ...,  0.12815818,
         0.13821185, -0.12555055],
       [-0.13402689,  0.11265951, -0.11515143, ...,  0.12815818,
         0.13821185, -0.12555055]], dtype=float32)

array([[-0.13402689,  0.11265951, -0.11515143, ...,  0.12815818,
         0.13821185, -0.12555055],
       [-0.13402689,  0.11265951, -0.11515143, ...,  0.12815818,
         0.13821185, -0.12555055],
       [-0.13402689,  0.11265951, -0.11515143, ...,  0.12815818,
         0.13821185, -0.12555055],
       [-0.13402689,  0.11265951, -0.11515143, ...,  0.12815818,
         0.13821185, -0.12555055]], dtype=float32)

array([[-0.13402689,  0.11265951, -0.11515143, ...,  0.12815818,
         0.13821185, -0.12555055],
       [-0.13402689,  0.11265951, -0.11515143, ...,  0.12815818,
         0.13821185, -0.12555055],
       [-0.13402689,  0.11265951, -0.11515143, ...,  0.12815818,
         0.13821185, -0.12555055],
       [-0.13402689,  0.11265951, -0.11515143, ...,  0.12815818,
         0.13821185, -0.12555055]], dtype=float32)

array([[-0.13402689,  0.11265951, -0.11515143, ...,  0.12815818,
         0.13821185, -0.12555055],
       [-0.13402689,  0.11265951, -0.11515143, ...,  0.12815818,
         0.13821185, -0.12555055],
       [-0.13402689,  0.11265951, -0.11515143, ...,  0.12815818,
         0.13821185, -0.12555055],
       [-0.13402689,  0.11265951, -0.11515143, ...,  0.12815818,
         0.13821185, -0.12555055]], dtype=float32)

array([[-0.13402689,  0.11265951, -0.11515143, ...,  0.12815818,
         0.13821185, -0.12555055],
       [-0.13402689,  0.11265951, -0.11515143, ...,  0.12815818,
         0.13821185, -0.12555055],
       [-0.13402689,  0.11265951, -0.11515143, ...,  0.12815818,
         0.13821185, -0.12555055],
       [-0.13402689,  0.11265951, -0.11515143, ...,  0.12815818,
         0.13821185, -0.12555055]], dtype=float32)

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_transform.bias', 'vocab_projector.bias', 'vocab_transform.weight', 'vocab_layer_norm.bias', 'vocab_layer_norm.weight', 'vocab_projector.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_transform.bias', 'vocab_projector.bias', 'vocab_transform.weight', 'vocab_layer_norm.bias', 'vocab_layer_norm.weight', 'vocab_projector.weight']
- T

None 0
epoch 0 version: 7


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.62185 loss: 1.7745394072067335
train val: 0.6013 loss: 1.4023633702451364
test: 0.5507 loss: 1.70880985946632
epoch time: 11.83min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_0.pkl
best train: 0.62185
best train val: 0.6013
best test: 0.5507
epoch 1 version: 7


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.6031 loss: 0.7781518956616521
train val: 0.6634 loss: 0.5839086244288832
test: 0.5655 loss: 0.72518733895123
epoch time: 11.48min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_1.pkl
best train: 0.6031
best train val: 0.6634
best test: 0.5655
epoch 2 version: 7


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.7256 loss: 0.5131121148750186
train val: 0.6772 loss: 0.5553551696198061
test: 0.583 loss: 0.7066839819133282
epoch time: 12.08min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_2.pkl
best train: 0.7256
best train val: 0.6772
best test: 0.583
epoch 3 version: 7


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.7613 loss: 0.44680245517046424
train val: 0.6899 loss: 0.5536354661806254
test: 0.6096 loss: 0.6820989182412625
epoch time: 12.01min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_3.pkl
best train: 0.7613
best train val: 0.6899
best test: 0.6096
epoch 4 version: 7


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.77095 loss: 0.423101530269075
train val: 0.6971 loss: 0.5101440171981231
test: 0.5887 loss: 0.6672375190675258
epoch time: 11.98min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_4.pkl
best train: 0.77095
best train val: 0.6971
best test: 0.5887
epoch 5 version: 7


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.8168 loss: 0.35408319746564687
train val: 0.694 loss: 0.5179621286929352
test: 0.5941 loss: 0.6617787913322448
epoch time: 12.10min
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_4.pkl
best train: 0.77095
best train val: 0.6971
best test: 0.5887
epoch 6 version: 7


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.78075 loss: 0.5136843757559474
train val: 0.683 loss: 0.5614753312140703
test: 0.5657 loss: 0.7074917588174343
epoch time: 11.98min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v7_lg_0.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_0.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_4.pkl
best train: 0.77095
best train val: 0.6971
best test: 0.5887
epoch 7 version: 7


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.82295 loss: 0.3390327628902047
train val: 0.6934 loss: 0.5313079586483538
test: 0.5928 loss: 0.695860969042778
epoch time: 11.65min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v7_lg_1.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_1.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_4.pkl
best train: 0.77095
best train val: 0.6971
best test: 0.5887
epoch 8 version: 7


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.83655 loss: 0.4002296531963692
train val: 0.7084 loss: 0.509014545772253
test: 0.5744 loss: 0.6748736840426922
epoch time: 11.62min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v7_lg_2.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_2.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_8.pkl
best train: 0.83655
best train val: 0.7084
best test: 0.5744
epoch 9 version: 7


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.84595 loss: 0.31049162761586013
train val: 0.7025 loss: 0.549667366528511
test: 0.5759 loss: 0.7576123836874962
epoch time: 11.90min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v7_lg_6.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_6.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_8.pkl
best train: 0.83655
best train val: 0.7084
best test: 0.5744
epoch 10 version: 7


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.8362 loss: 0.31990451351657356
train val: 0.7122 loss: 0.5171322136909701
test: 0.5792 loss: 0.7035541096687317
epoch time: 11.43min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v7_lg_3.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_3.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_10.pkl
best train: 0.8362
best train val: 0.7122
best test: 0.5792
epoch 11 version: 7


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.83255 loss: 0.3202941599355918
train val: 0.7148 loss: 0.5086763628699293
test: 0.5951 loss: 0.6802368289977312
epoch time: 12.24min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v7_lg_7.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_7.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_11.pkl
best train: 0.83255
best train val: 0.7148
best test: 0.5951
epoch 12 version: 7


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.83815 loss: 0.314087191529307
train val: 0.7176 loss: 0.514269862486422
test: 0.5933 loss: 0.6659267780900001
epoch time: 12.04min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v7_lg_5.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_5.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_12.pkl
best train: 0.83815
best train val: 0.7176
best test: 0.5933
epoch 13 version: 7


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.83095 loss: 0.34650854276302345
train val: 0.7063 loss: 0.4937385989313945
test: 0.5661 loss: 0.6882416447460651
epoch time: 12.40min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v7_lg_4.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_4.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_12.pkl
best train: 0.83815
best train val: 0.7176
best test: 0.5933
epoch 14 version: 7


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.83685 loss: 0.31117436063498627
train val: 0.727 loss: 0.4852307893070392
test: 0.6052 loss: 0.667114349937439
epoch time: 11.81min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v7_lg_9.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_9.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_14.pkl
best train: 0.83685
best train val: 0.727
best test: 0.6052
epoch 15 version: 7


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.84245 loss: 0.30641611365351273
train val: 0.7352 loss: 0.49112230277342317
test: 0.6098 loss: 0.6781145765304566
epoch time: 12.12min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v7_lg_13.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_13.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_15.pkl
best train: 0.84245
best train val: 0.7352
best test: 0.6098
epoch 16 version: 7


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.8378 loss: 0.3415661134566633
train val: 0.7313 loss: 0.5192235174039712
test: 0.6034 loss: 0.7444085005253553
epoch time: 12.11min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v7_lg_8.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_8.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_15.pkl
best train: 0.84245
best train val: 0.7352
best test: 0.6098
epoch 17 version: 7


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.83975 loss: 0.3104730922760644
train val: 0.7308 loss: 0.48431492136353627
test: 0.6178 loss: 0.66953666639328
epoch time: 11.77min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v7_lg_10.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_10.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_15.pkl
best train: 0.84245
best train val: 0.7352
best test: 0.6098
epoch 18 version: 7


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.8488 loss: 0.29460772819906267
train val: 0.7423 loss: 0.47122300749302376
test: 0.6354 loss: 0.6348423278808594
epoch time: 11.80min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v7_lg_11.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_11.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_18.pkl
best train: 0.8488
best train val: 0.7423
best test: 0.6354
epoch 19 version: 7


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.8383 loss: 0.32709947251068733
train val: 0.7434 loss: 0.48898517724201085
test: 0.6047 loss: 0.6973569317281246
epoch time: 11.57min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v7_lg_12.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_12.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_19.pkl
best train: 0.8383
best train val: 0.7434
best test: 0.6047
epoch 20 version: 7


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.8459 loss: 0.2964082527023929
train val: 0.7472 loss: 0.4788614019801851
test: 0.6214 loss: 0.6695426309078932
epoch time: 12.09min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v7_lg_14.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_14.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_20.pkl
best train: 0.8459
best train val: 0.7472
best test: 0.6214
epoch 21 version: 7


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.85825 loss: 0.2876457078683141
train val: 0.7483 loss: 0.4705887165047607
test: 0.6055 loss: 0.6826833169519901
epoch time: 11.25min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v7_lg_17.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_17.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_21.pkl
best train: 0.85825
best train val: 0.7483
best test: 0.6055
epoch 22 version: 7


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.8545 loss: 0.28514605140814986
train val: 0.7453 loss: 0.4778335502685921
test: 0.5978 loss: 0.7125196521520615
epoch time: 11.98min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v7_lg_16.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_16.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_21.pkl
best train: 0.85825
best train val: 0.7483
best test: 0.6055
epoch 23 version: 7


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.85235 loss: 0.2948363781483185
train val: 0.751 loss: 0.4559442458899692
test: 0.6012 loss: 0.6644754538655281
epoch time: 11.97min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v7_lg_15.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_15.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_23.pkl
best train: 0.85235
best train val: 0.751
best test: 0.6012
epoch 24 version: 7


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.86165 loss: 0.27824685959739454
train val: 0.7346 loss: 1.2730958347572865
test: 0.6076 loss: 0.6576379602789879
epoch time: 11.89min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v7_lg_18.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_18.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_23.pkl
best train: 0.85235
best train val: 0.751
best test: 0.6012
epoch 25 version: 7


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.8593 loss: 0.2858693088912776
train val: 0.7472 loss: 0.4897236751932767
test: 0.6072 loss: 0.7150841431081295
epoch time: 11.63min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v7_lg_24.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_24.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_23.pkl
best train: 0.85235
best train val: 0.751
best test: 0.6012
epoch 26 version: 7


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.85935 loss: 0.28399239301964835
train val: 0.7389 loss: 0.4651710458979942
test: 0.5772 loss: 0.6954235296189785
epoch time: 11.89min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v7_lg_19.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_19.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_23.pkl
best train: 0.85235
best train val: 0.751
best test: 0.6012
epoch 27 version: 7


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.85925 loss: 0.28355703282083555
train val: 0.7526 loss: 0.45705522775987995
test: 0.5894 loss: 0.693178917351365
epoch time: 11.78min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v7_lg_26.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_26.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_27.pkl
best train: 0.85925
best train val: 0.7526
best test: 0.5894
epoch 28 version: 7


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.8616 loss: 0.27298160836456836
train val: 0.75 loss: 0.45901810668881043
test: 0.6057 loss: 0.6738091388583183
epoch time: 12.07min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v7_lg_22.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_22.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_27.pkl
best train: 0.85925
best train val: 0.7526
best test: 0.5894
epoch 29 version: 7


train:   0%|          | 0/20000 [00:00<?, ?it/s]

train val:   0%|          | 0/10000 [00:00<?, ?it/s]

test:   0%|          | 0/10000 [00:00<?, ?it/s]

train: 0.86905 loss: 0.2690407262679932
train val: 0.759 loss: 0.4763432358721413
test: 0.611 loss: 0.6874653227001428
epoch time: 11.98min
removing /mnt/d/workspace/clotho/notebooks/checkpoints/stats_v7_lg_25.json
removing /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_25.pkl
best model: /mnt/d/workspace/clotho/notebooks/checkpoints/harness_v7_lg_29.pkl
best train: 0.86905
best train val: 0.759
best test: 0.611


test val:   0%|          | 0/10000 [00:00<?, ?it/s]

test val: {'accuracy': 0.6173} loss: 0.6819494714677334
correct


Unnamed: 0,gen_name,parent_left,child_left,parent_right,child_right,sway_left,sway_right,correct_is_right,logit_left,logit_right,preds,truth
1,*valid--!copy;(mtl:20);(sw);(st),Men are still plenty selective about who they ...,"Men are selective, as in, they'll select from ...","Men are selective, as in, they'll select from ...",Men are still plenty selective about who they ...,0.999877,0.0001233946,False,0.578769,0.421231,0,0
2,random--valid;(sw);(st),Dungeons and Dragons,Did you see this post a week ago and got jealo...,"Question for the men, what's the worst part of...",When sitting wrong on your bike seat and cutti...,0.119203,0.8807971,True,0.191766,0.808234,1,1
3,random--valid;(sw);(st),I always brush my teeth but need to floss more...,As an american all I can say is: why?,[removed],User profile checks out. Bet you're on a list ...,0.119203,0.8807971,True,0.000179,0.999821,1,1
1,*valid--!copy;(mtl:20);(sw);(st),Yeah that's a thing. I spend my summer vacatio...,you ever done on going maintenance on a boat? ...,you ever done on going maintenance on a boat? ...,Yeah that's a thing. I spend my summer vacatio...,1.0,1.5628820000000001e-18,False,0.784672,0.215328,0,0
2,*valid--!copy;(sw);(st),She actually has a song where this is the poin...,"Yep, this one came to mind. She is incredible ...","Yep, this one came to mind. She is incredible ...",She actually has a song where this is the poin...,0.997527,0.002472623,False,0.994568,0.005432,0,0


incorrect


Unnamed: 0,gen_name,parent_left,child_left,parent_right,child_right,sway_left,sway_right,correct_is_right,logit_left,logit_right,preds,truth
0,!copy--valid;(mtl:20);(sw);(st),Gonna ride the baloney pony all the way to Was...,I'd literally take the law into my own hands.,I'd literally take the law into my own hands.,Gonna ride the baloney pony all the way to Was...,4.662937e-15,1.0,True,0.635587,0.364413,0,1
0,*valid--random;(sw);(st),And they taste delicious when chopped into hal...,"In Spain/Portugal we also cook them ""a feira"" ...",Seinfeld seems to be the one that paved the wa...,Wiping down gym equipment.,0.9933071,0.006693,False,0.46819,0.53181,1,0
1,random--valid;(mtl:20);(sw);(st),I am uncomfortably comfortable not trying.,They're slightly better than nickleback. Haha,"US uses 120 VAC (RMS), so the wattage is half ...",Interesting!! I came into this not knowing muc...,0.1192029,0.880797,True,0.707786,0.292214,0,1
1,!copy--valid;(mtl:20);(sw);(st),I remember learning in 2nd grade that pizza wa...,The food pyramid -brought to you by big grain,The food pyramid -brought to you by big grain,I remember learning in 2nd grade that pizza wa...,0.0,1.0,True,0.581855,0.418146,0,1
0,random--valid;(mtl:20);(sw);(st),Day-bird aaaAAAAaaaa Fighter of the Night-bird...,Happy endings. Very underrated,You just described my relationship Lol,"That doesn't sound healthy, it's not supposed ...",0.01798621,0.982014,True,0.556109,0.443891,0,1


array([[-0.7014988 ,  0.558522  ,  0.47718802, ...,  0.6170604 ,
         0.2213868 , -0.21728808],
       [-0.701512  ,  0.55946547,  0.47733477, ...,  0.61641407,
         0.22057512, -0.21764515],
       [-0.70152026,  0.56015974,  0.4774379 , ...,  0.6159454 ,
         0.21998498, -0.217913  ],
       [-0.7015317 ,  0.56164986,  0.4776472 , ...,  0.6149612 ,
         0.21873413, -0.21849404]], dtype=float32)

array([[-0.70152587,  0.56072503,  0.47751904, ...,  0.61556876,
         0.2195084 , -0.21813285],
       [-0.7015125 ,  0.5595083 ,  0.4773413 , ...,  0.616385  ,
         0.22053847, -0.21766162],
       [-0.7015233 ,  0.5604583 ,  0.47748107, ...,  0.615746  ,
         0.2197328 , -0.21802902],
       [-0.7014934 ,  0.5581708 ,  0.4771313 , ...,  0.61730343,
         0.22169217, -0.21715793]], dtype=float32)

array([[-0.7015011 ,  0.5586797 ,  0.47721317, ...,  0.6169514 ,
         0.22125024, -0.21734709],
       [-0.7014876 ,  0.55780137,  0.47707024, ...,  0.6175607 ,
         0.22201538, -0.217023  ],
       [-0.7015094 ,  0.55926555,  0.4773044 , ...,  0.6165499 ,
         0.220746  , -0.21756876],
       [-0.7014947 ,  0.5582557 ,  0.47714502, ...,  0.6172448 ,
         0.22161825, -0.2171892 ]], dtype=float32)

array([[-0.70149744,  0.55844015,  0.47717485, ...,  0.6171169 ,
         0.22145776, -0.21725756],
       [-0.70150363,  0.55886126,  0.47724175, ...,  0.6168268 ,
         0.22109339, -0.2174154 ],
       [-0.7015221 ,  0.56033874,  0.47746387, ...,  0.6158258 ,
         0.21983369, -0.21798249],
       [-0.7015023 ,  0.55876666,  0.47722682, ...,  0.6168918 ,
         0.22117516, -0.21737969]], dtype=float32)

array([[-0.70150644,  0.55905694,  0.47727212, ...,  0.6166928 ,
         0.22092512, -0.21748938],
       [-0.70148534,  0.55765635,  0.4770458 , ...,  0.6176617 ,
         0.22214273, -0.2169707 ],
       [-0.70149946,  0.55857396,  0.47719628, ...,  0.61702466,
         0.22134188, -0.21730743],
       [-0.7015099 ,  0.5592961 ,  0.47730902, ...,  0.6165292 ,
         0.22071981, -0.21758041]], dtype=float32)

array([[-0.7014963 ,  0.5583602 ,  0.47716212, ...,  0.6171723 ,
         0.22152714, -0.21722798],
       [-0.70152104,  0.5602039 ,  0.47744462, ...,  0.6159156 ,
         0.21994747, -0.21793029],
       [-0.7015076 ,  0.5591319 ,  0.47728375, ...,  0.6166413 ,
         0.22086078, -0.21751785],
       [-0.7014914 ,  0.55803305,  0.47710872, ...,  0.61739945,
         0.2218126 , -0.21710734]], dtype=float32)

array([[-0.70150536,  0.5589707 ,  0.47725892, ...,  0.6167516 ,
         0.22099914, -0.21745671],
       [-0.7015103 ,  0.559334  ,  0.47731477, ...,  0.61650366,
         0.2206876 , -0.2175948 ],
       [-0.70150584,  0.5590114 ,  0.47726515, ...,  0.61672384,
         0.22096418, -0.21747212],
       [-0.7014894 ,  0.5578963 ,  0.47708607, ...,  0.6174944 ,
         0.22193211, -0.21705748]], dtype=float32)

array([[-0.70151764,  0.55991906,  0.4774026 , ...,  0.6161072 ,
         0.22018886, -0.21781982],
       [-0.7014901 ,  0.55796295,  0.477097  , ...,  0.6174479 ,
         0.22187367, -0.21708177],
       [-0.7014944 ,  0.55822754,  0.47714046, ...,  0.61726433,
         0.22164273, -0.21717884],
       [-0.70152205,  0.5603171 ,  0.4774607 , ...,  0.61584026,
         0.21985196, -0.2179741 ]], dtype=float32)

array([[-0.70150536,  0.55894566,  0.47725493, ...,  0.616769  ,
         0.22102089, -0.21744728],
       [-0.70149225,  0.5580835 ,  0.47711673, ...,  0.61736476,
         0.22176851, -0.21712583],
       [-0.7015084 ,  0.5592023 ,  0.47729468, ...,  0.6165936 ,
         0.22080043, -0.21754456],
       [-0.70150876,  0.5592128 ,  0.4772961 , ...,  0.61658645,
         0.22079137, -0.2175485 ]], dtype=float32)

array([[-0.7014938 ,  0.5581999 ,  0.47713593, ...,  0.6172834 ,
         0.2216669 , -0.21716861],
       [-0.70149606,  0.5583425 ,  0.47715905, ...,  0.61718446,
         0.22154258, -0.2172214 ],
       [-0.70150423,  0.558891  ,  0.47724634, ...,  0.6168063 ,
         0.22106788, -0.21742655],
       [-0.70150715,  0.55909896,  0.4772787 , ...,  0.61666375,
         0.22088885, -0.21750543]], dtype=float32)