## Imports

In [None]:
!pip install torch transformers scikit-learn



In [None]:
import os
import random
import time
from datetime import datetime
from tqdm import tqdm
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import IterableDataset, DataLoader
from torch.nn.utils.rnn import pad_packed_sequence, pack_padded_sequence
from huggingface_hub import login
from transformers import AutoTokenizer
from collections import defaultdict
from types import SimpleNamespace
import sys

## arguments

In [None]:
args= SimpleNamespace(
    data_dir="data",
    save_dir="trained_models",
    hf_token="XXXXX",
    ckpt=None,
    batch_size=64,
    epochs=20,
    lr=1e-4,
    seed=24601,
    num_workers=0,
    pos_cat="encyclopedia",
    neg_cat="holmes",
    print_freq=100,
    model_type="lstm",
    lstm_hidden_dim=300,
    lstm_num_layers=3,
)

if 'google.colab' in sys.modules:
    print("On Colab")
    from google.colab import drive, userdata
    drive.mount('/content/drive')
    args.hf_token = userdata.get('HF_TOKEN')
    args.data_dir = "/content/drive/My Drive/modular-fudge/" + args.data_dir
    args.save_dir = "/content/drive/My Drive/modular-fudge/" + args.save_dir

random.seed(args.seed)
np.random.seed(args.seed)
torch.manual_seed(args.seed)

On Colab
Mounted at /content/drive


<torch._C.Generator at 0x7ec9aefca150>

## constants.py

In [None]:
# --- Tokenizer ---
TOKENIZER_NAME = 'meta-llama/Llama-3.2-3B-Instruct'
PAD_TOKEN = '[PAD]'

# --- Data Processing ---
VAL_SIZE = 400
MAX_LEN = 200
MIN_SENTENCE_LENGTH = 3

# --- Training ---
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

## util.py

In [None]:
def save_checkpoint(state, save_path):
    os.makedirs(os.path.dirname(save_path), exist_ok=True)
    torch.save(state, save_path)

In [None]:
def num_params(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [None]:
def pad_mask(lengths: torch.LongTensor) -> torch.ByteTensor:
    """
    Create a mask of batch x seq where 1 is for non-padding
    and 0 is for padding.
    """
    max_seqlen = torch.max(lengths)
    # (max_seqlen, batch_size)
    expanded_lengths = lengths.unsqueeze(0).repeat((max_seqlen, 1))
    # (max_seqlen, batch_size)
    indices = torch.arange(max_seqlen).unsqueeze(1).repeat((1, lengths.size(0))).to(lengths.device)

    # (max_seqlen, batch_size) -> (batch_size, max_seqlen)
    return (expanded_lengths > indices).permute(1, 0)

In [None]:
class ProgressMeter(object):
    def __init__(self, num_batches, meters, prefix=""):
        self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
        self.meters = meters
        self.prefix = prefix

    def display(self, batch):
        entries = [self.prefix + self.batch_fmtstr.format(batch)]
        entries.append(time.ctime(time.time()))
        entries += [str(meter) for meter in self.meters]
        print('\t'.join(entries))

    def _get_batch_fmtstr(self, num_batches):
        num_digits = len(str(num_batches // 1))
        fmt = '{:' + str(num_digits) + 'd}'
        return '[' + fmt + '/' + fmt.format(num_batches) + ']'

In [None]:
class AverageMeter(object):
    def __init__(self, name, fmt=':f'):
        self.name = name
        self.fmt = fmt
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        if self.count > 0:
            self.avg = self.sum / self.count

    def __str__(self):
        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
        return fmtstr.format(**self.__dict__)

## data.py

In [None]:
def collate(batch):
    """
    This collate function is unchanged from the original,
    as it's needed to pad the inputs and handle the labels.
    """
    pad_id = batch[0][2] # Now index 2
    inputs = [b[0] for b in batch]
    lengths = torch.LongTensor([b[1] for b in batch])
    max_length = lengths.max()

    for i in range(len(inputs)):
        if len(inputs[i]) < max_length:
            # Pad with 0, as that's the embedding padding_idx
            inputs[i] = torch.cat([inputs[i], torch.zeros(max_length - len(inputs[i])).long()], dim=0)

    inputs = torch.stack(inputs, dim=0)

    # Get the single integer label (index 3)
    classification_labels = [b[3] for b in batch]
    classification_labels = torch.LongTensor(classification_labels)

    return (inputs, lengths, classification_labels)

In [None]:
class Dataset:
    def __init__(self, args):
        print('Loading data...')
        self.data_dir = args.data_dir
        self.tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_NAME)

        # Add pad token if it doesn't exist
        if self.tokenizer.pad_token is None:
            self.tokenizer.add_special_tokens({'pad_token': PAD_TOKEN})

        self.tokenizer_pad_id = self.tokenizer.pad_token_id

        train, val, test = [], [], []

        # --- Process Train & Val from the 'train' directory ---
        for category, label in [(args.pos_cat, 1), (args.neg_cat, 0)]:
            train_file_path = os.path.join(args.data_dir, 'splits', 'train', f'{category}.txt')

            # Check if file exists before opening
            if not os.path.exists(train_file_path):
                print(f"Warning: Train file not found at {train_file_path}. Skipping.")
                continue

            with open(train_file_path, 'r', encoding='utf-8') as rf:
                for i, line in enumerate(rf):
                    # ... (line truncation logic) ...

                    if i < VAL_SIZE // 2:
                        val.append((line.strip(), label))
                    else:
                        train.append((line.strip(), label))

        # --- Process Test from the 'test' directory ---
        for category, label in [(args.pos_cat, 1), (args.neg_cat, 0)]:
            # Notice the change from 'train' to 'test' in the path
            test_file_path = os.path.join(args.data_dir, 'splits', 'test', f'{category}.txt')

            # Check if file exists before opening
            if not os.path.exists(test_file_path):
                print(f"Warning: Test file not found at {test_file_path}. Skipping.")
                continue

            with open(test_file_path, 'r', encoding='utf-8') as rf:
                for line in rf:
                    # ... (line truncation logic) ...
                    test.append((line.strip(), label))

        # This part remains the same
        self.splits = {'train': train, 'val': val, 'test': test}
        print('Done loading data. Split sizes:')
        for key in self.splits:
            print(f"{key}: {len(self.splits[key])}")

    def shuffle(self, split, seed=None):
        if seed is not None:
            random.seed(seed)
        random.shuffle(self.splits[split])

    def loader(self, split, batch_size, num_workers=0, indices=None):
        data = self.splits[split] if indices is None else [self.splits[split][i] for i in indices]
        return DataLoader(
            SplitLoader(data, self),
            batch_size=batch_size,
            pin_memory=True,
            collate_fn=collate,
            num_workers=num_workers
        )

In [None]:
class SplitLoader(IterableDataset):
    def __init__(self, data, parent):
        super().__init__()
        self.data = data
        self.pos = 0
        self.parent = parent

    def __len__(self):
        return len(self.data)

    def __iter__(self):
        self.pos = 0 # Reset for new epoch
        return self

    def __next__(self):
        # This logic is simplified from the original multi-threaded worker logic
        # for clarity. It will work correctly with num_workers=0.
        if self.pos >= len(self):
            raise StopIteration

        raw_sentence, classification_label = self.data[self.pos]
        self.pos += 1

        sentence_tokens = self.parent.tokenizer.encode(raw_sentence, return_tensors='pt')[0]
        length = len(sentence_tokens)

        if length < MIN_SENTENCE_LENGTH:
            # Skip this item and try the next one
            return self.__next__()

        pad_id = self.parent.tokenizer_pad_id

        # Return (input_tokens, length, pad_id, label)
        # collate fn will handle the pad_id
        return (sentence_tokens, length, pad_id, classification_label)

# Models

## LSTM Model

In [None]:
"""
========================================================================
Model Definition File Contract
========================================================================

This file defines a classifier model architecture that is compatible with
the project's main training (`main_train.py`) and evaluation (`evaluate.py`)
scripts.

To add a new model (e.g., "MyNewModel"), create a new file like this one
(e.g., `models\my_new_model.py`) and implement the following components:

1.  A class that inherits from `torch.nn.Module`.
2.  An `__init__` method with a specific signature.
3.  An *internal* `forward` method for the model's logic.
4.  A `get_scores_for_batch` "adapter" method for training.
5.  A `get_final_scores` "adapter" method for evaluation.

The factory in `models\__init__.py` must also be updated to import
and select this new class based on the `--model_type` argument.

------------------------------------------------------------------------
CONTRACT DETAILS
------------------------------------------------------------------------

--- [1. `__init__` Method] ---

The `__init__` method *must* have the following signature:

def __init__(self, args, vocab_size):
    ...

    - `args`: The fully populated `ArgumentParser` namespace. This
      object will contain all command-line arguments, allowing the
      model to pull its own specific hyperparameters (e.g.,
      `args.my_model_hidden_dim`, `args.my_model_num_layers`).

    - `vocab_size`: An integer (e.g., from `len(tokenizer)`)
      specifying the total vocabulary size. This is required to
      correctly initialize the `nn.Embedding` layer.

--- [2. `forward` Method] ---

The `forward` method is *internal* to your model. Its signature can
be whatever you need.

    - Example: `def forward(self, inputs, lengths):` (for LSTM)
    - Example: `def forward(self, inputs):` (for Mamba/Transformer)

This method will contain the core architectural logic (embeddings,
RNN/Mamba/Transformer layers, output head).

It *must* be causal (unidirectional) and output a tensor of
per-token logits.

    - **Output Shape:** `(batch_size, seq_len)`

--- [3. `get_scores_for_batch` Method] ---

This is the adapter method called by `main_train.py`. It is
responsible for unpacking the batch, calling its own `forward`
method, and returning *all* per-token scores for the loss
calculation.

    - **Input:** `batch` (The raw, collated batch from the DataLoader.
      Typically `[inputs, lengths, classification_targets]`)

    - **Returns:** A tuple of `(scores, targets)`
        - `scores`: `torch.Tensor` of shape `(batch_size, seq_len)`
          (The per-token logits from the `forward` pass).
        - `targets`: `torch.Tensor` of shape `(batch_size,)`
          (The true class labels, e.g., [0, 1, 1, 0]).

--- [4. `get_final_scores` Method] ---

This is the adapter method called by `evaluate.py`. It is
responsible for unpacking the batch, calling `forward`, and
returning the logit from *only* the single, final, unpadded token.

    - **Input:** `batch` (The raw, collated batch, same as above).

    - **Returns:** `last_logits`
        - `last_logits`: `torch.Tensor` of shape `(batch_size,)`
          (The logit from the last *real* token for each item
          in the batch).
"""
def comment_only():
    return

  (e.g., `models\my_new_model.py`) and implement the following components:


In [None]:
# --- Model Architecture ---
class LSTMClassifier(nn.Module):

    def __init__(self, args, vocab_size):
        """
        Initializes the LSTM model.

        Args:
            args: The full ArgumentParser namespace. Reads
                  `args.lstm_hidden_dim` and `args.lstm_num_layers`.
            vocab_size: The total vocabulary size for the embedding layer.
        """
        super().__init__()

        # --- CRITICAL CHANGE ---
        # Using `vocab_size` (e.g., 32000) is robust and correct.
        # Using `tokenizer_pad_id + 1` was brittle and would fail
        # with many tokenizers where the pad ID is not the highest ID.
        self.embed = nn.Embedding(
            num_embeddings=vocab_size,
            embedding_dim=args.lstm_hidden_dim,
            padding_idx=0  # Assuming 0 is your pad ID
        )
        # --- End of Change ---

        self.rnn = nn.LSTM(
            args.lstm_hidden_dim,
            args.lstm_hidden_dim,
            num_layers=args.lstm_num_layers,
            bidirectional=False,
            dropout=0.5,
            batch_first=True # Makes the permute/transpose logic simpler
        )
        self.out_linear = nn.Linear(args.lstm_hidden_dim, 1)

    def forward(self, inputs, lengths):
        """
        Internal forward pass for the LSTM.
        Requires `lengths` for sequence packing.
        """
        # (batch_size, seq_len, hidden_dim)
        embedded_inputs = self.embed(inputs)

        # Pack sequence for efficient RNN processing
        packed_inputs = pack_padded_sequence(
            embedded_inputs,
            lengths.cpu(), # Must be on CPU
            batch_first=True,
            enforce_sorted=False
        )

        # rnn_output is (packed_batch, hidden_dim)
        rnn_output, _ = self.rnn(packed_inputs)

        # Unpack: (batch_size, seq_len, hidden_dim)
        rnn_output, _ = pad_packed_sequence(
            rnn_output,
            batch_first=True
        )

        # (batch_size, seq_len)
        return self.out_linear(rnn_output).squeeze(2)

    # ---
    # --- Adapter Methods (The "Contract") ---
    # ---

    def get_scores_for_batch(self, batch):
        """
        Adapter for training.
        Unpacks batch, calls `self.forward`, and returns all scores.
        """
        # Unpack the batch as needed *by this model*
        inputs, lengths, classification_targets = batch

        # Move tensors to the model's device
        inputs = inputs.to(self.embed.weight.device)
        lengths = lengths.to(self.embed.weight.device)

        # Call this model's specific forward pass
        scores = self.forward(inputs, lengths)

        # Return what the training loop needs
        return scores, classification_targets

    def get_final_scores(self, batch):
        """
        Adapter for evaluation.
        Unpacks batch, calls `self.forward`, and returns final logit.
        """
        # We need all 3 components from the batch
        inputs, lengths, _ = batch

        # Move tensors to the model's device
        inputs = inputs.to(self.embed.weight.device)
        lengths = lengths.to(self.embed.weight.device)

        # Call this model's specific forward pass
        # scores shape: (batch_size, seq_len)
        scores = self.forward(inputs, lengths)

        # Find the index of the last token
        # Shape: (batch_size,)
        last_indices = (lengths - 1).long()

        # Gather the specific scores from those last indices
        # Shape: (batch_size, 1) -> (batch_size,)
        last_logits = scores.gather(
            1, last_indices.unsqueeze(1)
        ).squeeze(1)

        return last_logits

## model init

In [None]:
def get_model(args, tokenizer_pad_id):
    """
    This factory function reads the --model_type argument
    and returns the correct, initialized model.
    """
    if args.model_type == 'lstm':
        return LSTMClassifier(args, tokenizer_pad_id)
    else:
        raise ValueError(f"Unknown model type: {args.model_type}")

## main_train.py

In [None]:
def train(model, dataset, optimizer, criterion, epoch, args):
    model.train()
    dataset.shuffle('train', seed=epoch + args.seed)

    loader = dataset.loader('train', args.batch_size, num_workers=args.num_workers)
    loss_meter = AverageMeter('loss', ':6.4f')
    total_length = len(loader)
    progress = ProgressMeter(total_length, [loss_meter], prefix='Training: ')

    for batch_num, batch in enumerate(tqdm(loader, total=len(loader))):
        # Unpack the simplified batch
        inputs, lengths, classification_targets = batch

        # Move to device BEFORE calling model's forward pass
        inputs = inputs.to(args.device)
        lengths = lengths.to(args.device)
        classification_targets = classification_targets.to(args.device)

        # Get per-token scores from the model
        scores = model(inputs, lengths) # (batch_size, seq_len)

        # --- This is the "Implicit Prefix" Loss Logic ---

        expanded_labels = classification_targets.unsqueeze(1).expand(-1, scores.shape[1])

        # 2. Get padding mask (batch_size, seq_len)
        length_mask = pad_mask(lengths) # 1 for real tokens, 0 for padding

        # 3. Flatten scores, labels, and mask
        scores_flat = scores.flatten()
        labels_flat = expanded_labels.flatten().float()
        mask_flat = length_mask.flatten()

        # 4. Select only the non-padded tokens for loss calculation
        scores_unpadded = scores_flat[mask_flat == 1]
        labels_unpadded = labels_flat[mask_flat == 1]

        # 5. Calculate loss
        loss = criterion(scores_unpadded, labels_unpadded)
        # --- End of Implicit Logic ---

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        loss_meter.update(loss.item(), inputs.size(0))
        if batch_num % args.print_freq == 0:
            progress.display(batch_num)

    progress.display(total_length)

In [None]:
def validate(model, dataset, criterion, args):
    model.eval()
    loader = dataset.loader('val', args.batch_size, num_workers=args.num_workers)
    loss_meter = AverageMeter('loss', ':6.4f')
    total_length = len(loader)
    progress = ProgressMeter(total_length, [loss_meter], prefix='Validation: ')

    with torch.no_grad():
        for batch_num, batch in enumerate(tqdm(loader, total=len(loader))):
            inputs, lengths, classification_targets = batch
            # Move to device
            inputs = inputs.to(args.device)
            lengths = lengths.to(args.device)
            classification_targets = classification_targets.to(args.device)

            scores = model(inputs, lengths)

            # --- Identical Implicit Loss Logic ---
            expanded_labels = classification_targets.unsqueeze(1).expand(-1, scores.shape[1])
            length_mask = pad_mask(lengths)

            scores_flat = scores.flatten()
            labels_flat = expanded_labels.flatten().float()
            mask_flat = length_mask.flatten()

            scores_unpadded = scores_flat[mask_flat == 1]
            labels_unpadded = labels_flat[mask_flat == 1]

            if scores_unpadded.nelement() > 0: # Avoid empty batches if all are filtered
                loss = criterion(scores_unpadded, labels_unpadded)
                loss_meter.update(loss.item(), inputs.size(0))

    progress.display(total_length)
    return loss_meter.avg

In [None]:
def main(args):
    login(token=args.hf_token)

    # Hard-code the task
    args.task = 'transfer'
    args.device = torch.device(DEVICE)

    dataset = Dataset(args)
    os.makedirs(args.save_dir, exist_ok=True)

    model = get_model(args, dataset.tokenizer_pad_id)
    model = model.to(args.device)

    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    best_val_metric = 1e8 # Lower is better for BCE

    print('Model Parameters:', num_params(model))
    criterion = nn.BCEWithLogitsLoss().to(args.device)

    now = datetime.now()
    date_string = now.strftime("%Y%m%d_%H%M%S")

    for epoch in range(args.epochs):
        print(f"--- TRAINING: Epoch {epoch} at {time.ctime()} ---")
        train(model, dataset, optimizer, criterion, epoch, args)

        print(f"--- VALIDATION: Epoch {epoch} at {time.ctime()} ---")
        metric = validate(model, dataset, criterion, args)

        if metric < best_val_metric:
            print(f'New best val metric: {metric:.4f}')
            best_val_metric = metric
            save_checkpoint({
                'epoch': epoch,
                'state_dict': model.state_dict(),
                'best_metric': best_val_metric,
                'optimizer': optimizer.state_dict(),
                'args': args
            }, os.path.join(args.save_dir, f'{args.model_type}_{date_string}.pth.tar'))

In [None]:
main(args)

Loading data...
Done loading data. Split sizes:
train: 2000
val: 400
test: 600
Model Parameters: 40644301
--- TRAINING: Epoch 0 at Sun Nov  9 22:26:27 2025 ---


  6%|▋         | 2/32 [00:01<00:21,  1.39it/s]

Training: [ 0/32]	Sun Nov  9 22:26:28 2025	loss 0.6914 (0.6914)


100%|██████████| 32/32 [00:07<00:00,  4.33it/s]


Training: [32/32]	Sun Nov  9 22:26:34 2025	loss 0.6520 (0.6818)
--- VALIDATION: Epoch 0 at Sun Nov  9 22:26:34 2025 ---


100%|██████████| 7/7 [00:00<00:00,  7.79it/s]


Validation: [7/7]	Sun Nov  9 22:26:35 2025	loss 0.7349 (0.6387)
New best val metric: 0.6387
--- TRAINING: Epoch 1 at Sun Nov  9 22:26:36 2025 ---


  3%|▎         | 1/32 [00:00<00:06,  5.12it/s]

Training: [ 0/32]	Sun Nov  9 22:26:37 2025	loss 0.6392 (0.6392)


100%|██████████| 32/32 [00:06<00:00,  5.26it/s]


Training: [32/32]	Sun Nov  9 22:26:42 2025	loss 0.1347 (0.3921)
--- VALIDATION: Epoch 1 at Sun Nov  9 22:26:42 2025 ---


100%|██████████| 7/7 [00:00<00:00,  7.67it/s]


Validation: [7/7]	Sun Nov  9 22:26:43 2025	loss 0.0659 (0.1186)
New best val metric: 0.1186
--- TRAINING: Epoch 2 at Sun Nov  9 22:26:45 2025 ---


  3%|▎         | 1/32 [00:00<00:06,  4.77it/s]

Training: [ 0/32]	Sun Nov  9 22:26:45 2025	loss 0.1222 (0.1222)


100%|██████████| 32/32 [00:06<00:00,  5.22it/s]


Training: [32/32]	Sun Nov  9 22:26:51 2025	loss 0.0529 (0.1363)
--- VALIDATION: Epoch 2 at Sun Nov  9 22:26:51 2025 ---


100%|██████████| 7/7 [00:00<00:00,  7.72it/s]


Validation: [7/7]	Sun Nov  9 22:26:52 2025	loss 0.0744 (0.1088)
New best val metric: 0.1088
--- TRAINING: Epoch 3 at Sun Nov  9 22:26:53 2025 ---


  3%|▎         | 1/32 [00:00<00:06,  5.07it/s]

Training: [ 0/32]	Sun Nov  9 22:26:53 2025	loss 0.0977 (0.0977)


100%|██████████| 32/32 [00:06<00:00,  5.18it/s]


Training: [32/32]	Sun Nov  9 22:26:59 2025	loss 0.1110 (0.0949)
--- VALIDATION: Epoch 3 at Sun Nov  9 22:26:59 2025 ---


100%|██████████| 7/7 [00:00<00:00,  7.74it/s]


Validation: [7/7]	Sun Nov  9 22:27:00 2025	loss 0.0374 (0.0752)
New best val metric: 0.0752
--- TRAINING: Epoch 4 at Sun Nov  9 22:27:01 2025 ---


  6%|▋         | 2/32 [00:00<00:05,  5.01it/s]

Training: [ 0/32]	Sun Nov  9 22:27:01 2025	loss 0.0556 (0.0556)


100%|██████████| 32/32 [00:06<00:00,  5.16it/s]


Training: [32/32]	Sun Nov  9 22:27:07 2025	loss 0.2308 (0.0911)
--- VALIDATION: Epoch 4 at Sun Nov  9 22:27:07 2025 ---


100%|██████████| 7/7 [00:00<00:00,  7.59it/s]


Validation: [7/7]	Sun Nov  9 22:27:08 2025	loss 0.0467 (0.1114)
--- TRAINING: Epoch 5 at Sun Nov  9 22:27:08 2025 ---


  3%|▎         | 1/32 [00:00<00:05,  5.21it/s]

Training: [ 0/32]	Sun Nov  9 22:27:08 2025	loss 0.1186 (0.1186)


100%|██████████| 32/32 [00:06<00:00,  4.83it/s]


Training: [32/32]	Sun Nov  9 22:27:15 2025	loss 0.0715 (0.1078)
--- VALIDATION: Epoch 5 at Sun Nov  9 22:27:15 2025 ---


100%|██████████| 7/7 [00:00<00:00,  7.16it/s]


Validation: [7/7]	Sun Nov  9 22:27:16 2025	loss 0.0374 (0.0842)
--- TRAINING: Epoch 6 at Sun Nov  9 22:27:16 2025 ---


  3%|▎         | 1/32 [00:00<00:05,  5.36it/s]

Training: [ 0/32]	Sun Nov  9 22:27:16 2025	loss 0.0740 (0.0740)


100%|██████████| 32/32 [00:06<00:00,  5.26it/s]


Training: [32/32]	Sun Nov  9 22:27:22 2025	loss 0.0361 (0.0744)
--- VALIDATION: Epoch 6 at Sun Nov  9 22:27:22 2025 ---


100%|██████████| 7/7 [00:00<00:00,  7.68it/s]


Validation: [7/7]	Sun Nov  9 22:27:23 2025	loss 0.0375 (0.0753)
--- TRAINING: Epoch 7 at Sun Nov  9 22:27:23 2025 ---


  3%|▎         | 1/32 [00:00<00:05,  5.27it/s]

Training: [ 0/32]	Sun Nov  9 22:27:23 2025	loss 0.0629 (0.0629)


100%|██████████| 32/32 [00:06<00:00,  5.25it/s]


Training: [32/32]	Sun Nov  9 22:27:29 2025	loss 0.0320 (0.0567)
--- VALIDATION: Epoch 7 at Sun Nov  9 22:27:29 2025 ---


100%|██████████| 7/7 [00:00<00:00,  7.59it/s]


Validation: [7/7]	Sun Nov  9 22:27:30 2025	loss 0.0256 (0.0893)
--- TRAINING: Epoch 8 at Sun Nov  9 22:27:30 2025 ---


  3%|▎         | 1/32 [00:00<00:06,  5.06it/s]

Training: [ 0/32]	Sun Nov  9 22:27:30 2025	loss 0.0273 (0.0273)


100%|██████████| 32/32 [00:06<00:00,  5.25it/s]


Training: [32/32]	Sun Nov  9 22:27:36 2025	loss 0.0335 (0.0520)
--- VALIDATION: Epoch 8 at Sun Nov  9 22:27:36 2025 ---


100%|██████████| 7/7 [00:00<00:00,  7.73it/s]


Validation: [7/7]	Sun Nov  9 22:27:37 2025	loss 0.0315 (0.0964)
--- TRAINING: Epoch 9 at Sun Nov  9 22:27:37 2025 ---


  3%|▎         | 1/32 [00:00<00:05,  5.27it/s]

Training: [ 0/32]	Sun Nov  9 22:27:37 2025	loss 0.0814 (0.0814)


100%|██████████| 32/32 [00:06<00:00,  5.29it/s]


Training: [32/32]	Sun Nov  9 22:27:43 2025	loss 0.0393 (0.0483)
--- VALIDATION: Epoch 9 at Sun Nov  9 22:27:43 2025 ---


100%|██████████| 7/7 [00:00<00:00,  7.67it/s]


Validation: [7/7]	Sun Nov  9 22:27:44 2025	loss 0.0709 (0.1279)
--- TRAINING: Epoch 10 at Sun Nov  9 22:27:44 2025 ---


  3%|▎         | 1/32 [00:00<00:06,  5.17it/s]

Training: [ 0/32]	Sun Nov  9 22:27:44 2025	loss 0.1337 (0.1337)


100%|██████████| 32/32 [00:06<00:00,  5.30it/s]


Training: [32/32]	Sun Nov  9 22:27:50 2025	loss 0.0228 (0.0719)
--- VALIDATION: Epoch 10 at Sun Nov  9 22:27:50 2025 ---


100%|██████████| 7/7 [00:00<00:00,  7.71it/s]


Validation: [7/7]	Sun Nov  9 22:27:51 2025	loss 0.0263 (0.1173)
--- TRAINING: Epoch 11 at Sun Nov  9 22:27:51 2025 ---


  6%|▋         | 2/32 [00:00<00:05,  5.08it/s]

Training: [ 0/32]	Sun Nov  9 22:27:51 2025	loss 0.0424 (0.0424)


100%|██████████| 32/32 [00:06<00:00,  5.22it/s]


Training: [32/32]	Sun Nov  9 22:27:57 2025	loss 0.0507 (0.0433)
--- VALIDATION: Epoch 11 at Sun Nov  9 22:27:57 2025 ---


100%|██████████| 7/7 [00:00<00:00,  7.79it/s]


Validation: [7/7]	Sun Nov  9 22:27:58 2025	loss 0.0241 (0.0605)
New best val metric: 0.0605
--- TRAINING: Epoch 12 at Sun Nov  9 22:27:59 2025 ---


  0%|          | 0/32 [00:00<?, ?it/s]

Training: [ 0/32]	Sun Nov  9 22:27:59 2025	loss 0.0372 (0.0372)

  6%|▋         | 2/32 [00:00<00:05,  5.06it/s]




100%|██████████| 32/32 [00:06<00:00,  5.22it/s]


Training: [32/32]	Sun Nov  9 22:28:05 2025	loss 0.0249 (0.0427)
--- VALIDATION: Epoch 12 at Sun Nov  9 22:28:05 2025 ---


100%|██████████| 7/7 [00:00<00:00,  7.70it/s]


Validation: [7/7]	Sun Nov  9 22:28:06 2025	loss 0.0264 (0.0559)
New best val metric: 0.0559
--- TRAINING: Epoch 13 at Sun Nov  9 22:28:07 2025 ---


  3%|▎         | 1/32 [00:00<00:06,  4.74it/s]

Training: [ 0/32]	Sun Nov  9 22:28:07 2025	loss 0.0256 (0.0256)


100%|██████████| 32/32 [00:06<00:00,  5.22it/s]


Training: [32/32]	Sun Nov  9 22:28:13 2025	loss 0.0252 (0.0785)
--- VALIDATION: Epoch 13 at Sun Nov  9 22:28:13 2025 ---


100%|██████████| 7/7 [00:00<00:00,  7.72it/s]


Validation: [7/7]	Sun Nov  9 22:28:14 2025	loss 0.0141 (0.1095)
--- TRAINING: Epoch 14 at Sun Nov  9 22:28:14 2025 ---


  3%|▎         | 1/32 [00:00<00:05,  5.37it/s]

Training: [ 0/32]	Sun Nov  9 22:28:14 2025	loss 0.0431 (0.0431)


100%|██████████| 32/32 [00:06<00:00,  5.23it/s]


Training: [32/32]	Sun Nov  9 22:28:20 2025	loss 0.0275 (0.0502)
--- VALIDATION: Epoch 14 at Sun Nov  9 22:28:20 2025 ---


100%|██████████| 7/7 [00:00<00:00,  7.74it/s]


Validation: [7/7]	Sun Nov  9 22:28:21 2025	loss 0.0199 (0.0564)
--- TRAINING: Epoch 15 at Sun Nov  9 22:28:21 2025 ---


  3%|▎         | 1/32 [00:00<00:06,  5.00it/s]

Training: [ 0/32]	Sun Nov  9 22:28:21 2025	loss 0.0582 (0.0582)


100%|██████████| 32/32 [00:06<00:00,  5.20it/s]


Training: [32/32]	Sun Nov  9 22:28:27 2025	loss 0.0205 (0.0385)
--- VALIDATION: Epoch 15 at Sun Nov  9 22:28:27 2025 ---


100%|██████████| 7/7 [00:00<00:00,  7.76it/s]


Validation: [7/7]	Sun Nov  9 22:28:28 2025	loss 0.0191 (0.0953)
--- TRAINING: Epoch 16 at Sun Nov  9 22:28:28 2025 ---


  3%|▎         | 1/32 [00:00<00:05,  5.33it/s]

Training: [ 0/32]	Sun Nov  9 22:28:29 2025	loss 0.0369 (0.0369)


100%|██████████| 32/32 [00:06<00:00,  5.18it/s]


Training: [32/32]	Sun Nov  9 22:28:35 2025	loss 0.0196 (0.0359)
--- VALIDATION: Epoch 16 at Sun Nov  9 22:28:35 2025 ---


100%|██████████| 7/7 [00:00<00:00,  7.67it/s]


Validation: [7/7]	Sun Nov  9 22:28:35 2025	loss 0.0217 (0.0603)
--- TRAINING: Epoch 17 at Sun Nov  9 22:28:35 2025 ---


  3%|▎         | 1/32 [00:00<00:06,  5.00it/s]

Training: [ 0/32]	Sun Nov  9 22:28:36 2025	loss 0.0241 (0.0241)


100%|██████████| 32/32 [00:06<00:00,  5.24it/s]


Training: [32/32]	Sun Nov  9 22:28:42 2025	loss 0.0192 (0.0320)
--- VALIDATION: Epoch 17 at Sun Nov  9 22:28:42 2025 ---


100%|██████████| 7/7 [00:00<00:00,  7.64it/s]


Validation: [7/7]	Sun Nov  9 22:28:42 2025	loss 0.0181 (0.0955)
--- TRAINING: Epoch 18 at Sun Nov  9 22:28:42 2025 ---


  3%|▎         | 1/32 [00:00<00:05,  5.36it/s]

Training: [ 0/32]	Sun Nov  9 22:28:43 2025	loss 0.0516 (0.0516)


100%|██████████| 32/32 [00:06<00:00,  5.24it/s]


Training: [32/32]	Sun Nov  9 22:28:49 2025	loss 0.0170 (0.0334)
--- VALIDATION: Epoch 18 at Sun Nov  9 22:28:49 2025 ---


100%|██████████| 7/7 [00:00<00:00,  7.75it/s]


Validation: [7/7]	Sun Nov  9 22:28:49 2025	loss 0.0218 (0.0857)
--- TRAINING: Epoch 19 at Sun Nov  9 22:28:49 2025 ---


  3%|▎         | 1/32 [00:00<00:05,  5.20it/s]

Training: [ 0/32]	Sun Nov  9 22:28:50 2025	loss 0.0314 (0.0314)


100%|██████████| 32/32 [00:06<00:00,  5.27it/s]


Training: [32/32]	Sun Nov  9 22:28:56 2025	loss 0.1467 (0.0318)
--- VALIDATION: Epoch 19 at Sun Nov  9 22:28:56 2025 ---


100%|██████████| 7/7 [00:00<00:00,  7.67it/s]


Validation: [7/7]	Sun Nov  9 22:28:56 2025	loss 0.0181 (0.1334)
