# Model Training
This notebook is used for training purposes of training and saving fine-tuned models on different size datasets and hiperparameters

## Load Base Model

Load base DistilBERT uncased model that will be used as a starting pretrained model for fine-tuning models


In [1]:
import os
import copy
from transformers import AutoModel
import torch
from torch import nn
from torch.utils.data import DataLoader
from transformers import AutoModel
from transformers import get_linear_schedule_with_warmup
from models.ModelRetriever import get_full_classification_model, get_classification_head_model, get_adapters_model, get_lora_model
from hf_utils import save_model_to_hf
import json
import time
import pickle
from sklearn.metrics import accuracy_score, f1_score
from datasets import load_from_disk
import numpy as np


In [2]:
base_model = AutoModel.from_pretrained("bert-base-uncased", dtype=torch.float32)

Import necessary libraries for model training 

## Prepare data for training

Import necessary libraries and set global parameters for training and saving model

In [3]:
METRICS_DIR = './training/training_metrics'
TRAINER_DIR = './training/trainer_metrics'
SEED = 42

LR = {
    'full': 1e-5,
    'head': 1e-4,
    'adapters': 1e-4,
    'lora': 1e-4,
}
EPOCHS = {
    'small': 5,
    'medium': 5,
    'full': 4
}
WEIGHT_DECAY = {
    'small': 0.001,
    'medium': 0.01,
    'full': 0.03
}
NUM_LABELS = 2 # Binary classification problem
MAX_LENGTH = 512 # Max length of tokens for examples, if less pad with 0, if more truncate
BATCH_SIZE={
    "small": 4,
    "medium": 16,
    "full": 16
}
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

Paths to vectorized datasets ready for models to use <br>
If empty, run Dataset_Loader.ipynb notebook

In [4]:
DATASETS_PATHS = {
    'small': {
        'train': './datasets/train_dataset_small',
        'validation': './datasets/validation_dataset_small',
        'test': './datasets/test_dataset'
    },
    'medium': {
        'train': './datasets/train_dataset_medium',
        'validation': './datasets/validation_dataset_medium',
        'test': './datasets/test_dataset'
    },
    'full': {
        'train': './datasets/train_dataset_full',
        'validation': './datasets/validation_dataset_full',
        'test': './datasets/test_dataset'
    }
}

## Functions for training process

For performing and monitoring training processes, also contains functions for saving results

Monitor training metrics, such as time for each epoch and gpu memory usage

In [5]:
class TrainingLogger:
    """
    Tracks per-epoch time, GPU memory usage.
    """
    def __init__(self):
        self.epoch_times = []
        self.memory_log = []
        self.start_time = None
        self.total_training_time = None

    def start_training(self):
        self.start_time = time.time()

    def end_training(self):
        self.total_training_time = time.time() - self.start_time

    def log_epoch(self, epoch, epoch_start_time):
        # time
        epoch_time = time.time() - epoch_start_time
        self.epoch_times.append({"epoch": epoch + 1, "time_seconds": epoch_time})

        # GPU memory
        if torch.cuda.is_available():
            mem_mb = torch.cuda.memory_allocated(0) // (1024 ** 2)
            self.memory_log.append({"epoch": epoch + 1, "gpu_memory_used_mb": int(mem_mb)})
            

Monitor training metrics for later analisys

In [6]:
class ModelTrainer:
    def __init__(self, model: nn.Module, device: str = 'cpu'):
        self.model = model.to(device)
        self.device = device

    def train(
        self,
        train_loader: DataLoader,
        val_loader: DataLoader,
        optimizer: torch.optim.Optimizer,
        loss_fn: nn.Module,
        epochs: int,
        save_dir: TRAINER_DIR,
        filename: str,
        num_labels: int = NUM_LABELS,
        grad_clip: float = 1.0,
        scheduler = None,
        early_stopping_patience: int = 1
    ):
        os.makedirs(save_dir, exist_ok=True)
        best_model = None

        best_val_loss = float('inf')
        patience_counter = 0
        
        logger = TrainingLogger()
        logger.start_training()

        history = {"train_loss": [], "val_loss": [], "val_accuracy": [], "val_f1": []}

        for epoch in range(epochs):
            epoch_start = time.time()
            self.model.train()
            running_loss = 0.0
            
            for batch in train_loader:
                input_ids     = batch["input_ids"].to(self.device)
                attention_mask= batch["attention_mask"].to(self.device)
                labels        = batch["label"].to(self.device)

                optimizer.zero_grad(set_to_none=True)
                logits = self.model(input_ids, attention_mask=attention_mask)
                loss = loss_fn(logits, labels)
                loss.backward()

                if grad_clip is not None:
                    torch.nn.utils.clip_grad_norm_(
                        (p for p in self.model.parameters() if p.requires_grad), grad_clip
                    )
            
                optimizer.step()
                if scheduler is not None:
                    scheduler.step()
                
                running_loss += loss.item()

            avg_train_loss = running_loss / max(1, len(train_loader))
            history["train_loss"].append(avg_train_loss)

            # ---- validation ----
            self.model.eval()
            val_loss = 0.0
            all_preds, all_labels = [], []
            with torch.no_grad():
                for batch in val_loader:
                    input_ids      = batch["input_ids"].to(self.device)
                    attention_mask = batch["attention_mask"].to(self.device)
                    labels         = batch["label"].to(self.device)
                    
                    logits = self.model(input_ids, attention_mask=attention_mask)
                    loss = loss_fn(logits, labels)
                    val_loss += loss.item()

                    preds = torch.argmax(logits, dim=1).cpu()
                    all_preds.extend(preds.tolist())
                    all_labels.extend(labels.cpu().tolist())

            avg_val_loss = val_loss / max(1, len(val_loader))
            val_acc = accuracy_score(np.array(all_labels), np.array(all_preds))
            val_f1 = f1_score(np.array(all_labels), np.array(all_preds), average="weighted", labels=list(range(num_labels)))        


            print(
                f"Epoch {epoch+1}/{epochs} | "
                f"Train Loss: {avg_train_loss:.4f} | "
                f"Val Loss: {avg_val_loss:.4f} | "
                f"Val Acc: {val_acc:.4f} | "
                f"Val F1: {val_f1:.4f}"
            )

            if avg_val_loss < best_val_loss:
                best_val_loss = avg_val_loss
                patience_counter = 0
                best_model = copy.deepcopy(self.model)
            else:
                patience_counter += 1
                if patience_counter >= early_stopping_patience:
                    print(f'Early stopping triggered at epoch {epoch + 1}')
                    break

            history["val_loss"].append(avg_val_loss)
            history["val_accuracy"].append(val_acc)
            history["val_f1"].append(val_f1)

            logger.log_epoch(epoch, epoch_start)
            

        logger.end_training()

        if best_model is not None:
            self.model = best_model
            
        # persist logs
        log_blob = {
            "history": history,
            "epoch_times": logger.epoch_times,
            "gpu_memory_log": logger.memory_log,
            "total_training_time_seconds": logger.total_training_time,
        }
        
        file_path = os.path.join(save_dir, f"{filename}.pkl")
        with open(file_path, "wb") as f:
            pickle.dump(log_blob, f)

        print(f"Saved metrics to: {file_path}")
        return history

Start training process for each model <br>
Returns fine-tuned model

In [7]:
def Train(model, size, name, learning_rate):
    print(f"\n========== Training {name} ==========")
    save_dir = os.path.join(METRICS_DIR, name)
    trainer = ModelTrainer(model, device=DEVICE)

    train_ds = load_from_disk(DATASETS_PATHS[size]['train'])
    val_ds = load_from_disk(DATASETS_PATHS[size]['validation'])

    train_ds.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])
    val_ds.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])

    train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE[size], shuffle=True)
    val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE[size], shuffle=False)     

    # optimizer only on trainable params
    optim = torch.optim.AdamW(
        [p for p in model.parameters() if p.requires_grad], 
        lr=learning_rate,
        weight_decay = WEIGHT_DECAY[size]
    )

    num_training_steps = len(train_loader) * EPOCHS[size]
    warmup_rate = 0.25 if size == 'small' else 0.1
    num_warmup_steps = int(warmup_rate * num_training_steps) # 10% warmup

    scheduler = get_linear_schedule_with_warmup(
                    optimizer = optim,
                    num_warmup_steps = num_warmup_steps,
                    num_training_steps = num_training_steps
                )
    
    loss_fn = nn.CrossEntropyLoss()

    # log param counts
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    total_params     = sum(p.numel() for p in model.parameters())

    os.makedirs(os.path.join(save_dir), exist_ok=True)
    with open(os.path.join(save_dir, "run_config.json"), "w") as f:
        json.dump({
            "model_name": name,
            "epochs": EPOCHS[size],
            "lr": learning_rate,
            "num_labels": NUM_LABELS,
            "max_length": MAX_LENGTH,
            "trainable_params": int(trainable_params),
            "total_params": int(total_params),
            "device": DEVICE
        }, f, indent=2)

    # Train model
    trainer.train(
        train_loader=train_loader,
        val_loader=val_loader,
        optimizer=optim,
        scheduler=scheduler,
        loss_fn=loss_fn,
        epochs=EPOCHS[size],
        save_dir=save_dir,
        filename=name,
        num_labels=NUM_LABELS,
        grad_clip=1.0,
        early_stopping_patience=1
    )

## Train models

Each function will take base model, transform it to specific model suitable for different fine-tuning purpose and run execution on small, medium and full dataset. Returned model is saved on Hugging Face platform 

### Full Fine-Tuning
Run execution on small, medium and full dataset. Save results on HF platform

In [8]:
learning_rate = LR['full']

for size in ['small', 'medium', 'full']:
        name = f"full_fine_tuning_set_{size}"

        base = copy.deepcopy(base_model)
        model = get_full_classification_model(base)

        Train(model, size, name, learning_rate)     
        save_model_to_hf(model, name)
        
print("\nAll runs completed.")


Epoch 1/5 | Train Loss: 0.7319 | Val Loss: 0.7003 | Val Acc: 0.5000 | Val F1: 0.4949
Epoch 2/5 | Train Loss: 0.6339 | Val Loss: 0.6700 | Val Acc: 0.6000 | Val F1: 0.5238
Epoch 3/5 | Train Loss: 0.4933 | Val Loss: 0.6503 | Val Acc: 0.5000 | Val F1: 0.4505
Epoch 4/5 | Train Loss: 0.3536 | Val Loss: 0.6504 | Val Acc: 0.6000 | Val F1: 0.5833
Early stopping triggered at epoch 4
Saved metrics to: ./training/training_metrics\full_fine_tuning_set_small\full_fine_tuning_set_small.pkl


pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

Model pushed to https://huggingface.co/Jovan23/full_fine_tuning_set_small

Epoch 1/5 | Train Loss: 0.6721 | Val Loss: 0.5767 | Val Acc: 0.7100 | Val F1: 0.7014
Epoch 2/5 | Train Loss: 0.3792 | Val Loss: 0.3409 | Val Acc: 0.8400 | Val F1: 0.8400
Epoch 3/5 | Train Loss: 0.2242 | Val Loss: 0.3597 | Val Acc: 0.8800 | Val F1: 0.8800
Early stopping triggered at epoch 3
Saved metrics to: ./training/training_metrics\full_fine_tuning_set_medium\full_fine_tuning_set_medium.pkl


pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

Model pushed to https://huggingface.co/Jovan23/full_fine_tuning_set_medium

Epoch 1/4 | Train Loss: 0.3059 | Val Loss: 0.2052 | Val Acc: 0.9216 | Val F1: 0.9215
Epoch 2/4 | Train Loss: 0.1667 | Val Loss: 0.2609 | Val Acc: 0.9252 | Val F1: 0.9252
Early stopping triggered at epoch 2
Saved metrics to: ./training/training_metrics\full_fine_tuning_set_full\full_fine_tuning_set_full.pkl


pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

Model pushed to https://huggingface.co/Jovan23/full_fine_tuning_set_full

All runs completed.


### Classification head Fine-Tuning
Run execution on small, medium and full dataset. Save results on HF platform

In [9]:
learning_rate = LR['head']
for size in ['small', 'medium', 'full']:
        name = f"head_fine_tuning_set_{size}"
        
        base = copy.deepcopy(base_model)
        model = get_classification_head_model(base)

        Train(model, size, name, learning_rate)     
        save_model_to_hf(model, name)
    
print("\nAll runs completed.")


Epoch 1/5 | Train Loss: 0.7803 | Val Loss: 0.6645 | Val Acc: 0.5000 | Val F1: 0.3333
Epoch 2/5 | Train Loss: 0.7360 | Val Loss: 0.6659 | Val Acc: 0.6000 | Val F1: 0.5238
Early stopping triggered at epoch 2
Saved metrics to: ./training/training_metrics\head_fine_tuning_set_small\head_fine_tuning_set_small.pkl


pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

Model pushed to https://huggingface.co/Jovan23/head_fine_tuning_set_small

Epoch 1/5 | Train Loss: 0.6873 | Val Loss: 0.6875 | Val Acc: 0.5900 | Val F1: 0.5850
Epoch 2/5 | Train Loss: 0.6646 | Val Loss: 0.6702 | Val Acc: 0.6800 | Val F1: 0.6795
Epoch 3/5 | Train Loss: 0.6459 | Val Loss: 0.6587 | Val Acc: 0.6700 | Val F1: 0.6700
Epoch 4/5 | Train Loss: 0.6331 | Val Loss: 0.6526 | Val Acc: 0.6800 | Val F1: 0.6795
Epoch 5/5 | Train Loss: 0.6236 | Val Loss: 0.6506 | Val Acc: 0.6800 | Val F1: 0.6795
Saved metrics to: ./training/training_metrics\head_fine_tuning_set_medium\head_fine_tuning_set_medium.pkl


pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

Model pushed to https://huggingface.co/Jovan23/head_fine_tuning_set_medium

Epoch 1/4 | Train Loss: 0.5815 | Val Loss: 0.4543 | Val Acc: 0.8228 | Val F1: 0.8228
Epoch 2/4 | Train Loss: 0.4547 | Val Loss: 0.4017 | Val Acc: 0.8420 | Val F1: 0.8420
Epoch 3/4 | Train Loss: 0.4257 | Val Loss: 0.3867 | Val Acc: 0.8432 | Val F1: 0.8432
Epoch 4/4 | Train Loss: 0.4152 | Val Loss: 0.3831 | Val Acc: 0.8452 | Val F1: 0.8452
Saved metrics to: ./training/training_metrics\head_fine_tuning_set_full\head_fine_tuning_set_full.pkl


pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

Model pushed to https://huggingface.co/Jovan23/head_fine_tuning_set_full

All runs completed.


### Adapters  Fine-Tuning
Run execution on small, medium and full dataset for different inner dimension adapters size. Save results on HF platform

In [10]:
ADAPTERS_SIZE = [
    48, 96
]

In [11]:
learning_rate = LR['adapters']

for adapter_size in ADAPTERS_SIZE:
    for size in ['small', 'medium', 'full']:
        name = f"adapters_inner_dim_{adapter_size}_set_{size}"

        base = copy.deepcopy(base_model)
        model = get_adapters_model(base, adapter_size=adapter_size)

        Train(model, size, name, learning_rate)     
        save_model_to_hf(model, name)
        
print("\nAll runs completed.")


Epoch 1/5 | Train Loss: 0.7044 | Val Loss: 0.7149 | Val Acc: 0.5000 | Val F1: 0.4949
Epoch 2/5 | Train Loss: 0.6932 | Val Loss: 0.6486 | Val Acc: 0.7000 | Val F1: 0.6970
Epoch 3/5 | Train Loss: 0.6643 | Val Loss: 0.7118 | Val Acc: 0.5000 | Val F1: 0.4505
Early stopping triggered at epoch 3
Saved metrics to: ./training/training_metrics\adapters_inner_dim_48_set_small\adapters_inner_dim_48_set_small.pkl


pytorch_model.bin:   0%|          | 0.00/448M [00:00<?, ?B/s]

Model pushed to https://huggingface.co/Jovan23/adapters_inner_dim_48_set_small

Epoch 1/5 | Train Loss: 0.6939 | Val Loss: 0.6954 | Val Acc: 0.5800 | Val F1: 0.5798
Epoch 2/5 | Train Loss: 0.6592 | Val Loss: 0.7249 | Val Acc: 0.5300 | Val F1: 0.5296
Early stopping triggered at epoch 2
Saved metrics to: ./training/training_metrics\adapters_inner_dim_48_set_medium\adapters_inner_dim_48_set_medium.pkl


pytorch_model.bin:   0%|          | 0.00/448M [00:00<?, ?B/s]

Model pushed to https://huggingface.co/Jovan23/adapters_inner_dim_48_set_medium

Epoch 1/4 | Train Loss: 0.5584 | Val Loss: 0.3979 | Val Acc: 0.8200 | Val F1: 0.8193
Epoch 2/4 | Train Loss: 0.3685 | Val Loss: 0.3341 | Val Acc: 0.8544 | Val F1: 0.8543
Epoch 3/4 | Train Loss: 0.3265 | Val Loss: 0.3439 | Val Acc: 0.8580 | Val F1: 0.8580
Early stopping triggered at epoch 3
Saved metrics to: ./training/training_metrics\adapters_inner_dim_48_set_full\adapters_inner_dim_48_set_full.pkl


pytorch_model.bin:   0%|          | 0.00/448M [00:00<?, ?B/s]

Model pushed to https://huggingface.co/Jovan23/adapters_inner_dim_48_set_full

Epoch 1/5 | Train Loss: 0.7150 | Val Loss: 0.7616 | Val Acc: 0.5000 | Val F1: 0.4505
Epoch 2/5 | Train Loss: 0.7516 | Val Loss: 0.7428 | Val Acc: 0.4000 | Val F1: 0.2857
Epoch 3/5 | Train Loss: 0.7024 | Val Loss: 0.7536 | Val Acc: 0.5000 | Val F1: 0.4505
Early stopping triggered at epoch 3
Saved metrics to: ./training/training_metrics\adapters_inner_dim_96_set_small\adapters_inner_dim_96_set_small.pkl


pytorch_model.bin:   0%|          | 0.00/448M [00:00<?, ?B/s]

Model pushed to https://huggingface.co/Jovan23/adapters_inner_dim_96_set_small

Epoch 1/5 | Train Loss: 0.6731 | Val Loss: 0.7202 | Val Acc: 0.4900 | Val F1: 0.4560
Epoch 2/5 | Train Loss: 0.6651 | Val Loss: 0.7024 | Val Acc: 0.5300 | Val F1: 0.5277
Epoch 3/5 | Train Loss: 0.6368 | Val Loss: 0.7552 | Val Acc: 0.5500 | Val F1: 0.5331
Early stopping triggered at epoch 3
Saved metrics to: ./training/training_metrics\adapters_inner_dim_96_set_medium\adapters_inner_dim_96_set_medium.pkl


pytorch_model.bin:   0%|          | 0.00/448M [00:00<?, ?B/s]

Model pushed to https://huggingface.co/Jovan23/adapters_inner_dim_96_set_medium

Epoch 1/4 | Train Loss: 0.5484 | Val Loss: 0.3784 | Val Acc: 0.8312 | Val F1: 0.8310
Epoch 2/4 | Train Loss: 0.3592 | Val Loss: 0.3578 | Val Acc: 0.8472 | Val F1: 0.8465
Epoch 3/4 | Train Loss: 0.3217 | Val Loss: 0.3200 | Val Acc: 0.8612 | Val F1: 0.8612
Epoch 4/4 | Train Loss: 0.3023 | Val Loss: 0.3233 | Val Acc: 0.8608 | Val F1: 0.8608
Early stopping triggered at epoch 4
Saved metrics to: ./training/training_metrics\adapters_inner_dim_96_set_full\adapters_inner_dim_96_set_full.pkl


pytorch_model.bin:   0%|          | 0.00/448M [00:00<?, ?B/s]

Model pushed to https://huggingface.co/Jovan23/adapters_inner_dim_96_set_full

All runs completed.


### LoRA  Fine-Tuning
Run execution on small, medium and full dataset for different hyperparameters. Save results on HF platform

In [12]:
LORA_CONFIGS = [
    {"r": 32, "alpha": 64},
    {"r": 64, "alpha": 32}
]

In [13]:
learning_rate = LR['lora']
for cfg in LORA_CONFIGS:
    for size in ['small', 'medium', 'full']:
        r = cfg['r']
        alpha = cfg['alpha']
        name = f"lora_r_{r}_alpha_{alpha}_set_{size}"

        base = copy.deepcopy(base_model)
        model = get_lora_model(base, rank=r, alpha=alpha)

        Train(model, size, name, learning_rate)     
        save_model_to_hf(model, name)
        
print("\nAll runs completed.")


Epoch 1/5 | Train Loss: 0.7005 | Val Loss: 0.7873 | Val Acc: 0.2000 | Val F1: 0.1667
Epoch 2/5 | Train Loss: 0.6549 | Val Loss: 0.7983 | Val Acc: 0.3000 | Val F1: 0.2929
Early stopping triggered at epoch 2
Saved metrics to: ./training/training_metrics\lora_r_32_alpha_64_set_small\lora_r_32_alpha_64_set_small.pkl


pytorch_model.bin:   0%|          | 0.00/443M [00:00<?, ?B/s]

Model pushed to https://huggingface.co/Jovan23/lora_r_32_alpha_64_set_small

Epoch 1/5 | Train Loss: 0.6217 | Val Loss: 0.5366 | Val Acc: 0.7200 | Val F1: 0.7029
Epoch 2/5 | Train Loss: 0.3574 | Val Loss: 0.3147 | Val Acc: 0.8900 | Val F1: 0.8900
Epoch 3/5 | Train Loss: 0.2598 | Val Loss: 0.2971 | Val Acc: 0.8900 | Val F1: 0.8900
Epoch 4/5 | Train Loss: 0.2404 | Val Loss: 0.2969 | Val Acc: 0.9000 | Val F1: 0.9000
Epoch 5/5 | Train Loss: 0.2026 | Val Loss: 0.3056 | Val Acc: 0.9100 | Val F1: 0.9100
Early stopping triggered at epoch 5
Saved metrics to: ./training/training_metrics\lora_r_32_alpha_64_set_medium\lora_r_32_alpha_64_set_medium.pkl


pytorch_model.bin:   0%|          | 0.00/443M [00:00<?, ?B/s]

Model pushed to https://huggingface.co/Jovan23/lora_r_32_alpha_64_set_medium

Epoch 1/4 | Train Loss: 0.3222 | Val Loss: 0.2155 | Val Acc: 0.9168 | Val F1: 0.9168
Epoch 2/4 | Train Loss: 0.1937 | Val Loss: 0.2343 | Val Acc: 0.9164 | Val F1: 0.9164
Early stopping triggered at epoch 2
Saved metrics to: ./training/training_metrics\lora_r_32_alpha_64_set_full\lora_r_32_alpha_64_set_full.pkl


pytorch_model.bin:   0%|          | 0.00/443M [00:00<?, ?B/s]

Model pushed to https://huggingface.co/Jovan23/lora_r_32_alpha_64_set_full

Epoch 1/5 | Train Loss: 0.7111 | Val Loss: 0.6853 | Val Acc: 0.5000 | Val F1: 0.4949
Epoch 2/5 | Train Loss: 0.6945 | Val Loss: 0.6921 | Val Acc: 0.4000 | Val F1: 0.4000
Early stopping triggered at epoch 2
Saved metrics to: ./training/training_metrics\lora_r_64_alpha_32_set_small\lora_r_64_alpha_32_set_small.pkl


pytorch_model.bin:   0%|          | 0.00/447M [00:00<?, ?B/s]

Model pushed to https://huggingface.co/Jovan23/lora_r_64_alpha_32_set_small

Epoch 1/5 | Train Loss: 0.6736 | Val Loss: 0.6174 | Val Acc: 0.7600 | Val F1: 0.7552
Epoch 2/5 | Train Loss: 0.4691 | Val Loss: 0.4110 | Val Acc: 0.8400 | Val F1: 0.8390
Epoch 3/5 | Train Loss: 0.3210 | Val Loss: 0.3358 | Val Acc: 0.8700 | Val F1: 0.8700
Epoch 4/5 | Train Loss: 0.2760 | Val Loss: 0.3236 | Val Acc: 0.8900 | Val F1: 0.8900
Epoch 5/5 | Train Loss: 0.2698 | Val Loss: 0.3221 | Val Acc: 0.8900 | Val F1: 0.8900
Saved metrics to: ./training/training_metrics\lora_r_64_alpha_32_set_medium\lora_r_64_alpha_32_set_medium.pkl


pytorch_model.bin:   0%|          | 0.00/447M [00:00<?, ?B/s]

Model pushed to https://huggingface.co/Jovan23/lora_r_64_alpha_32_set_medium

Epoch 1/4 | Train Loss: 0.3344 | Val Loss: 0.2394 | Val Acc: 0.9100 | Val F1: 0.9099
Epoch 2/4 | Train Loss: 0.1992 | Val Loss: 0.2087 | Val Acc: 0.9216 | Val F1: 0.9215
Epoch 3/4 | Train Loss: 0.1723 | Val Loss: 0.2236 | Val Acc: 0.9220 | Val F1: 0.9220
Early stopping triggered at epoch 3
Saved metrics to: ./training/training_metrics\lora_r_64_alpha_32_set_full\lora_r_64_alpha_32_set_full.pkl


pytorch_model.bin:   0%|          | 0.00/447M [00:00<?, ?B/s]

Model pushed to https://huggingface.co/Jovan23/lora_r_64_alpha_32_set_full

All runs completed.
