# LLM Fine-Tuning with Encoder

## Environment & Version Checks

In [1]:
import transformers
print(transformers.__version__)

import sys, os, json, copy
sys.path.append("..")

from datasets import load_dataset
from transformers import TrainingArguments, DataCollatorWithPadding
from src.EncoderTrainer import EncoderTrainer
from peft import LoraConfig, TaskType
import numpy as np
import torch
import time



4.41.2


## Global Configuration

In [2]:
# ===== paths & constants =====
OUTPUT_FOLDER = "../outputs"
DATA_FOLDER = "../data"

# MODEL_NAME = "bert-base-uncased"
MODEL_NAME = "yiyanghkust/finbert-tone"
FOLDER_NAME = "finbert-tone"
# MODEL_NAME = "microsoft/deberta-v3-base"

VAL_JSON  = f"{DATA_FOLDER}/processed/val.json"
TEST_JSON = f"{DATA_FOLDER}/processed/test.json"

MAX_LENGTH = 128
LABELS = ["negative", "neutral", "positive"]
NUM_LABELS = len(LABELS)

LORA_TUNING_DIR = f"{OUTPUT_FOLDER}/lora_tuning/{FOLDER_NAME}"
os.makedirs(LORA_TUNING_DIR, exist_ok=True)

## Dataset Loading

In [3]:
dataset = load_dataset(
    "json",
    data_files={
        "train": f"{DATA_FOLDER}/processed/train.json",
        "validation": f"{DATA_FOLDER}/processed/val.json",
        "test": f"{DATA_FOLDER}/processed/test.json",
    }
)


In [4]:
from collections import Counter

# Extract outputs
outputs = dataset['test']['output']

# Count occurrences
label_counts = Counter(outputs)
total = sum(label_counts.values())

counts = np.array([label_counts.get(label, 0) for label in LABELS])

print("Counts:", counts)

inverse_weights = total / (len(LABELS) * np.maximum(counts, 1))

# Normalize weights
inverse_weights = inverse_weights / inverse_weights.sum()

CLASS_WEIGHTS = torch.tensor(inverse_weights, dtype=torch.float)

print("Class Weights (Inverse Normalized):", CLASS_WEIGHTS)

Counts: [ 34 143  65]
Class Weights (Inverse Normalized): tensor([0.5679, 0.1350, 0.2971])


## Prompt Formatting

In [5]:
trainer = EncoderTrainer(
    model_name=MODEL_NAME,
    labels=LABELS,
    load_in_4bit=False,
)

tokenizer = trainer.tokenizer

def format_encoder(example):
    input = example["input"]
    label = LABELS.index(example["output"])  # or map from output

    return {
        "input": input,
        "label": label,
    }

dataset = dataset.map(
    format_encoder,
    batched=False,
    num_proc=1,
    desc="Formatting prompts"
)



## Tokenization & Data Collation

In [6]:
from transformers import DataCollatorWithPadding

def tokenize_encoder(batch):
    enc = tokenizer(
        batch["input"],
        truncation=True,
        max_length=MAX_LENGTH,
    )
    enc["labels"] = int(batch["label"])
    return enc

tokenized_ds = dataset.map(
    tokenize_encoder,
    batched=False,
    remove_columns=dataset["train"].column_names,
    desc="Tokenizing encoder inputs",
)

data_collator = DataCollatorWithPadding(tokenizer)

Tokenizing encoder inputs:   0%|          | 0/242 [00:00<?, ? examples/s]

## Training Configuration

In [7]:
BASE_TRAINING_ARGS = dict(
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=2,   # effective batch = 8
    num_train_epochs=4,
    lr_scheduler_type="cosine", 
    weight_decay=0.01,
    # warmup_steps=100,
    warmup_ratio=0.1,

    # precision (BF16 ONLY)
    fp16=False,
    bf16=True,

    eval_strategy="epoch",
    save_strategy="epoch",
    logging_steps=50,

    load_best_model_at_end=True,
    metric_for_best_model="eval_f1",
    greater_is_better=True,
    
    save_total_limit=1,
    report_to="none",
)


## Define the Experiment Function

In [8]:
RESULTS = []

# -------------------------
# Base configuration
# -------------------------
best_cfg = {
    "use_lora": True,              
    "learning_rate": 2e-4,
    "r": 8,
    "lora_alpha": 16,
    "lora_dropout": 0.05,
    "use_class_weights": False,
}

def run_experiment(cfg, stage_name):
    tag = f"{stage_name}_" + "_".join([f"{k}_{v}" for k, v in cfg.items()])
    out_dir = f"{LORA_TUNING_DIR}/{tag}"
    os.makedirs(out_dir, exist_ok=True)

    print(f"\n===== {stage_name} | Running config: {cfg} =====")

    # ----------------------------------
    # Build TrainingArguments
    # ----------------------------------
    training_args = TrainingArguments(
        output_dir=out_dir,
        learning_rate=cfg["learning_rate"],
        **BASE_TRAINING_ARGS,
    )

    # ----------------------------------
    # Initialize Trainer
    # ----------------------------------
    trainer = EncoderTrainer(
        model_name=MODEL_NAME,
        labels=LABELS,
        load_in_4bit=False,
    )

    # ----------------------------------
    # Configure LoRA (if enabled)
    # ----------------------------------
    if cfg.get("use_lora", True):
        trainer.configure_lora(
            r=cfg["r"],
            lora_alpha=cfg["lora_alpha"],
            lora_dropout=cfg["lora_dropout"],
            target_modules=["query", "value"],
        )

    # ----------------------------------
    # Class weights
    # ----------------------------------
    if cfg["use_class_weights"]:
        trainer.class_weights = CLASS_WEIGHTS

    # ----- Train -----
    metrics = trainer.train(
        train_dataset=tokenized_ds["train"],
        eval_dataset=tokenized_ds["validation"],
        training_args=training_args,
        data_collator=data_collator,
        classification_eval_fn=lambda: trainer.evaluate_classification(
            test_path=VAL_JSON,
            labels=LABELS,
            verbose=False,
        )
    )

    # ----------------------------------
    # Save model (FFT + LoRA unified)
    # ----------------------------------
    trainer.save_model(out_dir)
    
    # ----------------------------------
    # Save metrics
    # ----------------------------------
    with open(os.path.join(out_dir, "metrics.json"), "w") as f:
        json.dump(metrics, f, indent=2)

    # ----------------------------------
    # Save experiment metadata (FULL INFO)
    # ----------------------------------
    experiment_metadata = {
        "model": MODEL_NAME,
        "experiment_type": stage_name,
        "learning_rate": cfg.get("learning_rate"),
        "r": cfg.get("r"),
        "lora_alpha": cfg.get("lora_alpha"),
        "lora_dropout": cfg.get("lora_dropout"),
        "use_lora": cfg.get("use_lora"),
        "use_class_weights": cfg.get("use_class_weights"),
        "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
    }

    with open(os.path.join(out_dir, "exp_config.json"), "w") as f:
        json.dump(experiment_metadata, f, indent=2)

    # ----------------------------------
    # Store in memory
    # ----------------------------------
    RESULTS.append({
        "stage": stage_name,
        "config": copy.deepcopy(cfg),
        "metrics": metrics,
        "output_dir": out_dir,
    })

    return metrics


## Stage 1 — Full Fine-tuning

In [9]:
full_ft_cfg = {
    "use_lora": False,      # Full FT
    "learning_rate": 2e-4,
    "r": None,
    "lora_alpha": None,
    "lora_dropout": None,
    "use_class_weights": False,
}

lr_candidates = [5e-5, 1e-4, 2e-4, 3e-4, 5e-4, 6e-4]
best_metric = -1
best_lr = None

for lr in lr_candidates:
    cfg = copy.deepcopy(full_ft_cfg)
    cfg["learning_rate"] = lr

    metrics = run_experiment(cfg, f"FFT_LR_{lr}")
    score = metrics["f1"]   # use macro F1 ideally

    if score > best_metric:
        best_metric = score
        best_lr = lr

print("Best FFT LR:", best_lr)


===== FFT_LR_5e-05 | Running config: {'use_lora': False, 'learning_rate': 5e-05, 'r': None, 'lora_alpha': None, 'lora_dropout': None, 'use_class_weights': False} =====




Epoch,Training Loss,Validation Loss
1,0.7111,0.48594
2,0.3217,0.380785
3,0.2933,0.409719
4,0.1802,0.408675



[Classification Metrics]
accuracy: 0.7975
precision: 0.8232
recall: 0.6758
f1: 0.7135
auc_ovr: 0.9258

[Classification Metrics]
accuracy: 0.8843
precision: 0.8645
recall: 0.8967
f1: 0.8793
auc_ovr: 0.9533

[Classification Metrics]
accuracy: 0.8802
precision: 0.8825
recall: 0.8753
f1: 0.8788
auc_ovr: 0.9527

[Classification Metrics]
accuracy: 0.8802
precision: 0.8825
recall: 0.8753
f1: 0.8788
auc_ovr: 0.9527

===== FFT_LR_0.0001 | Running config: {'use_lora': False, 'learning_rate': 0.0001, 'r': None, 'lora_alpha': None, 'lora_dropout': None, 'use_class_weights': False} =====




Epoch,Training Loss,Validation Loss
1,0.6571,0.627765
2,0.3306,0.502563
3,0.1175,0.562859
4,0.0273,0.559634



[Classification Metrics]
accuracy: 0.8264
precision: 0.8903
recall: 0.7811
f1: 0.8051
auc_ovr: 0.9393

[Classification Metrics]
accuracy: 0.8967
precision: 0.8920
recall: 0.8871
f1: 0.8860
auc_ovr: 0.9509

[Classification Metrics]
accuracy: 0.8884
precision: 0.8941
recall: 0.8752
f1: 0.8841
auc_ovr: 0.9529

[Classification Metrics]
accuracy: 0.8926
precision: 0.8987
recall: 0.8776
f1: 0.8873
auc_ovr: 0.9531

===== FFT_LR_0.0002 | Running config: {'use_lora': False, 'learning_rate': 0.0002, 'r': None, 'lora_alpha': None, 'lora_dropout': None, 'use_class_weights': False} =====




Epoch,Training Loss,Validation Loss
1,0.9539,0.595421
2,0.4547,0.567122
3,0.2327,0.602394
4,0.0209,0.616125



[Classification Metrics]
accuracy: 0.7810
precision: 0.5337
recall: 0.5658
f1: 0.5453
auc_ovr: 0.9347

[Classification Metrics]
accuracy: 0.8595
precision: 0.8490
recall: 0.8495
f1: 0.8478
auc_ovr: 0.9368

[Classification Metrics]
accuracy: 0.8760
precision: 0.8814
recall: 0.8516
f1: 0.8644
auc_ovr: 0.9429

[Classification Metrics]
accuracy: 0.8760
precision: 0.8821
recall: 0.8588
f1: 0.8681
auc_ovr: 0.9424

===== FFT_LR_0.0003 | Running config: {'use_lora': False, 'learning_rate': 0.0003, 'r': None, 'lora_alpha': None, 'lora_dropout': None, 'use_class_weights': False} =====




Epoch,Training Loss,Validation Loss
1,1.0406,1.761913
2,1.111,0.925223
3,1.061,0.923488
4,1.1134,0.92393



[Classification Metrics]
accuracy: 0.5909
precision: 0.1970
recall: 0.3333
f1: 0.2476
auc_ovr: 0.4682

[Classification Metrics]
accuracy: 0.5909
precision: 0.1970
recall: 0.3333
f1: 0.2476
auc_ovr: 0.5151

[Classification Metrics]
accuracy: 0.5909
precision: 0.1970
recall: 0.3333
f1: 0.2476
auc_ovr: 0.4560

[Classification Metrics]
accuracy: 0.5909
precision: 0.1970
recall: 0.3333
f1: 0.2476
auc_ovr: 0.5032

===== FFT_LR_0.0005 | Running config: {'use_lora': False, 'learning_rate': 0.0005, 'r': None, 'lora_alpha': None, 'lora_dropout': None, 'use_class_weights': False} =====




Epoch,Training Loss,Validation Loss
1,1.1691,1.002612
2,1.0136,0.93028
3,1.0331,0.922709
4,1.0522,0.924049



[Classification Metrics]
accuracy: 0.5909
precision: 0.1970
recall: 0.3333
f1: 0.2476
auc_ovr: 0.5038

[Classification Metrics]
accuracy: 0.5909
precision: 0.1970
recall: 0.3333
f1: 0.2476
auc_ovr: 0.4880

[Classification Metrics]
accuracy: 0.5909
precision: 0.1970
recall: 0.3333
f1: 0.2476
auc_ovr: 0.4677

[Classification Metrics]
accuracy: 0.5909
precision: 0.1970
recall: 0.3333
f1: 0.2476
auc_ovr: 0.4548

===== FFT_LR_0.0006 | Running config: {'use_lora': False, 'learning_rate': 0.0006, 'r': None, 'lora_alpha': None, 'lora_dropout': None, 'use_class_weights': False} =====




Epoch,Training Loss,Validation Loss
1,1.1466,1.001408
2,1.0434,0.936311
3,1.0394,0.921826
4,1.0687,0.924492



[Classification Metrics]
accuracy: 0.5909
precision: 0.1970
recall: 0.3333
f1: 0.2476
auc_ovr: 0.5183

[Classification Metrics]
accuracy: 0.5909
precision: 0.1970
recall: 0.3333
f1: 0.2476
auc_ovr: 0.4946

[Classification Metrics]
accuracy: 0.5909
precision: 0.1970
recall: 0.3333
f1: 0.2476
auc_ovr: 0.4945

[Classification Metrics]
accuracy: 0.5909
precision: 0.1970
recall: 0.3333
f1: 0.2476
auc_ovr: 0.5080
Best FFT LR: 0.0001


## Stage 2 — LoRA (Tune Learning Rate)

In [10]:
lr_candidates = [5e-5, 1e-4, 2e-4, 3e-4, 5e-4, 6e-4]
best_metric = -1

for lr in lr_candidates:
    cfg = copy.deepcopy(best_cfg)
    cfg["learning_rate"] = lr

    metrics = run_experiment(cfg, f"LORA_LR_{lr}")
    score = metrics["f1"]
    
    if score > best_metric:
        best_metric = score
        best_cfg["learning_rate"] = lr

print("Best LR:", best_cfg["learning_rate"])



===== LORA_LR_5e-05 | Running config: {'use_lora': True, 'learning_rate': 5e-05, 'r': 8, 'lora_alpha': 16, 'lora_dropout': 0.05, 'use_class_weights': False} =====




trainable params: 297,219 || all params: 110,051,334 || trainable%: 0.27007305517986724


Epoch,Training Loss,Validation Loss
1,2.6145,1.438943
2,0.9408,0.929595
3,0.954,0.891395
4,0.9894,0.890714



[Classification Metrics]
accuracy: 0.5000
precision: 0.2845
recall: 0.3199
f1: 0.2998
auc_ovr: 0.5067





[Classification Metrics]
accuracy: 0.5413
precision: 0.3754
recall: 0.3743
f1: 0.3705
auc_ovr: 0.6598





[Classification Metrics]
accuracy: 0.5785
precision: 0.3995
recall: 0.3880
f1: 0.3802
auc_ovr: 0.6800





[Classification Metrics]
accuracy: 0.5785
precision: 0.3995
recall: 0.3880
f1: 0.3802
auc_ovr: 0.6803





===== LORA_LR_0.0001 | Running config: {'use_lora': True, 'learning_rate': 0.0001, 'r': 8, 'lora_alpha': 16, 'lora_dropout': 0.05, 'use_class_weights': False} =====




trainable params: 297,219 || all params: 110,051,334 || trainable%: 0.27007305517986724


Epoch,Training Loss,Validation Loss
1,1.0858,0.873737
2,0.771,0.783714
3,0.8432,0.729628
4,0.798,0.728107



[Classification Metrics]
accuracy: 0.6240
precision: 0.3684
recall: 0.3969
f1: 0.3697
auc_ovr: 0.7020





[Classification Metrics]
accuracy: 0.6446
precision: 0.5356
recall: 0.5208
f1: 0.5244
auc_ovr: 0.7663





[Classification Metrics]
accuracy: 0.6612
precision: 0.5568
recall: 0.4776
f1: 0.4878
auc_ovr: 0.7913





[Classification Metrics]
accuracy: 0.6612
precision: 0.5568
recall: 0.4776
f1: 0.4878
auc_ovr: 0.7922





===== LORA_LR_0.0002 | Running config: {'use_lora': True, 'learning_rate': 0.0002, 'r': 8, 'lora_alpha': 16, 'lora_dropout': 0.05, 'use_class_weights': False} =====




trainable params: 297,219 || all params: 110,051,334 || trainable%: 0.27007305517986724


Epoch,Training Loss,Validation Loss
1,0.9123,0.818289
2,0.6531,0.654138
3,0.7195,0.550422
4,0.5932,0.546197



[Classification Metrics]
accuracy: 0.6488
precision: 0.3938
recall: 0.4251
f1: 0.4001
auc_ovr: 0.7717





[Classification Metrics]
accuracy: 0.7066
precision: 0.6323
recall: 0.6368
f1: 0.6342
auc_ovr: 0.8539





[Classification Metrics]
accuracy: 0.7810
precision: 0.7759
recall: 0.6904
f1: 0.7196
auc_ovr: 0.8913





[Classification Metrics]
accuracy: 0.7893
precision: 0.7928
recall: 0.7071
f1: 0.7396
auc_ovr: 0.8915





===== LORA_LR_0.0003 | Running config: {'use_lora': True, 'learning_rate': 0.0003, 'r': 8, 'lora_alpha': 16, 'lora_dropout': 0.05, 'use_class_weights': False} =====




trainable params: 297,219 || all params: 110,051,334 || trainable%: 0.27007305517986724


Epoch,Training Loss,Validation Loss
1,0.8798,0.928971
2,0.5832,0.622199
3,0.6199,0.459502
4,0.4521,0.455278



[Classification Metrics]
accuracy: 0.6777
precision: 0.4439
recall: 0.4390
f1: 0.4163
auc_ovr: 0.8017





[Classification Metrics]
accuracy: 0.7149
precision: 0.6574
recall: 0.7057
f1: 0.6735
auc_ovr: 0.8898





[Classification Metrics]
accuracy: 0.8347
precision: 0.8447
recall: 0.7661
f1: 0.7974
auc_ovr: 0.9230





[Classification Metrics]
accuracy: 0.8430
precision: 0.8470
recall: 0.7899
f1: 0.8145
auc_ovr: 0.9246





===== LORA_LR_0.0005 | Running config: {'use_lora': True, 'learning_rate': 0.0005, 'r': 8, 'lora_alpha': 16, 'lora_dropout': 0.05, 'use_class_weights': False} =====




trainable params: 297,219 || all params: 110,051,334 || trainable%: 0.27007305517986724


Epoch,Training Loss,Validation Loss
1,0.8522,0.840129
2,0.4878,0.556528
3,0.5313,0.418441
4,0.3061,0.419513



[Classification Metrics]
accuracy: 0.7066
precision: 0.4484
recall: 0.4766
f1: 0.4539
auc_ovr: 0.8364





[Classification Metrics]
accuracy: 0.7851
precision: 0.7389
recall: 0.8192
f1: 0.7650
auc_ovr: 0.9327





[Classification Metrics]
accuracy: 0.8347
precision: 0.8198
recall: 0.7900
f1: 0.8037
auc_ovr: 0.9353





[Classification Metrics]
accuracy: 0.8430
precision: 0.8379
recall: 0.7946
f1: 0.8139
auc_ovr: 0.9358





===== LORA_LR_0.0006 | Running config: {'use_lora': True, 'learning_rate': 0.0006, 'r': 8, 'lora_alpha': 16, 'lora_dropout': 0.05, 'use_class_weights': False} =====




trainable params: 297,219 || all params: 110,051,334 || trainable%: 0.27007305517986724


Epoch,Training Loss,Validation Loss
1,0.8145,0.768732
2,0.4585,0.568443
3,0.4884,0.417191
4,0.2642,0.41932



[Classification Metrics]
accuracy: 0.7066
precision: 0.4470
recall: 0.4908
f1: 0.4656
auc_ovr: 0.8523





[Classification Metrics]
accuracy: 0.7851
precision: 0.7465
recall: 0.8144
f1: 0.7675
auc_ovr: 0.9346





[Classification Metrics]
accuracy: 0.8512
precision: 0.8515
recall: 0.8208
f1: 0.8350
auc_ovr: 0.9408





[Classification Metrics]
accuracy: 0.8554
precision: 0.8544
recall: 0.8255
f1: 0.8389
auc_ovr: 0.9415




Best LR: 0.0006


## Stage 3 — LoRA (Tune Rank)

In [11]:
rank_candidates = [4, 8, 16]
best_metric = -1

for r in rank_candidates:
    cfg = copy.deepcopy(best_cfg)
    cfg["r"] = r
    cfg["lora_alpha"] = 2 * r

    metrics = run_experiment(cfg, f"LORA_RANK_{r}")
    score = metrics["f1"]
    
    if score > best_metric:
        best_metric = score
        best_cfg["r"] = r
        best_cfg["lora_alpha"] = 2 * r

print("Best Rank:", best_cfg["r"])


===== LORA_RANK_4 | Running config: {'use_lora': True, 'learning_rate': 0.0006, 'r': 4, 'lora_alpha': 8, 'lora_dropout': 0.05, 'use_class_weights': False} =====




trainable params: 149,763 || all params: 109,903,878 || trainable%: 0.13626725710261106


Epoch,Training Loss,Validation Loss
1,0.8236,0.863063
2,0.4535,0.432942
3,0.4989,0.392517
4,0.3207,0.390727



[Classification Metrics]
accuracy: 0.6942
precision: 0.4454
recall: 0.4625
f1: 0.4411
auc_ovr: 0.8401





[Classification Metrics]
accuracy: 0.8264
precision: 0.8068
recall: 0.8188
f1: 0.8125
auc_ovr: 0.9341





[Classification Metrics]
accuracy: 0.8636
precision: 0.8720
recall: 0.8398
f1: 0.8547
auc_ovr: 0.9414





[Classification Metrics]
accuracy: 0.8554
precision: 0.8687
recall: 0.8304
f1: 0.8478
auc_ovr: 0.9416





===== LORA_RANK_8 | Running config: {'use_lora': True, 'learning_rate': 0.0006, 'r': 8, 'lora_alpha': 16, 'lora_dropout': 0.05, 'use_class_weights': False} =====




trainable params: 297,219 || all params: 110,051,334 || trainable%: 0.27007305517986724


Epoch,Training Loss,Validation Loss
1,0.8145,0.768732
2,0.4585,0.568443
3,0.4884,0.417191
4,0.2642,0.41932



[Classification Metrics]
accuracy: 0.7066
precision: 0.4470
recall: 0.4908
f1: 0.4656
auc_ovr: 0.8523





[Classification Metrics]
accuracy: 0.7851
precision: 0.7465
recall: 0.8144
f1: 0.7675
auc_ovr: 0.9346





[Classification Metrics]
accuracy: 0.8512
precision: 0.8515
recall: 0.8208
f1: 0.8350
auc_ovr: 0.9408





[Classification Metrics]
accuracy: 0.8554
precision: 0.8544
recall: 0.8255
f1: 0.8389
auc_ovr: 0.9415





===== LORA_RANK_16 | Running config: {'use_lora': True, 'learning_rate': 0.0006, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'use_class_weights': False} =====




trainable params: 592,131 || all params: 110,346,246 || trainable%: 0.5366118209404241


Epoch,Training Loss,Validation Loss
1,0.7941,0.776848
2,0.4018,0.494633
3,0.4295,0.377814
4,0.1942,0.377262



[Classification Metrics]
accuracy: 0.7397
precision: 0.7871
recall: 0.5431
f1: 0.5726
auc_ovr: 0.8896





[Classification Metrics]
accuracy: 0.8140
precision: 0.7667
recall: 0.8475
f1: 0.7952
auc_ovr: 0.9479





[Classification Metrics]
accuracy: 0.8719
precision: 0.8958
recall: 0.8301
f1: 0.8583
auc_ovr: 0.9501





[Classification Metrics]
accuracy: 0.8636
precision: 0.8752
recall: 0.8278
f1: 0.8490
auc_ovr: 0.9509




Best Rank: 16


## Stage 4 — LoRA (Tune Alpha)

In [12]:
alpha_candidates = [
    best_cfg["r"],
    2 * best_cfg["r"],
    4 * best_cfg["r"]
]

best_metric = -1

for alpha in alpha_candidates:
    cfg = copy.deepcopy(best_cfg)
    cfg["lora_alpha"] = alpha

    metrics = run_experiment(cfg, f"LORA_ALPHA_{alpha}")
    score = metrics["f1"]
    
    if score > best_metric:
        best_metric = score
        best_cfg["lora_alpha"] = alpha

print("Best Alpha:", best_cfg["lora_alpha"])



===== LORA_ALPHA_16 | Running config: {'use_lora': True, 'learning_rate': 0.0006, 'r': 16, 'lora_alpha': 16, 'lora_dropout': 0.05, 'use_class_weights': False} =====




trainable params: 592,131 || all params: 110,346,246 || trainable%: 0.5366118209404241


Epoch,Training Loss,Validation Loss
1,0.8212,0.835114
2,0.4517,0.506681
3,0.4922,0.410727
4,0.2589,0.413398



[Classification Metrics]
accuracy: 0.7025
precision: 0.4424
recall: 0.4908
f1: 0.4643
auc_ovr: 0.8508





[Classification Metrics]
accuracy: 0.8140
precision: 0.7822
recall: 0.8307
f1: 0.7994
auc_ovr: 0.9360





[Classification Metrics]
accuracy: 0.8512
precision: 0.8573
recall: 0.8161
f1: 0.8345
auc_ovr: 0.9419





[Classification Metrics]
accuracy: 0.8512
precision: 0.8573
recall: 0.8161
f1: 0.8345
auc_ovr: 0.9421





===== LORA_ALPHA_32 | Running config: {'use_lora': True, 'learning_rate': 0.0006, 'r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'use_class_weights': False} =====




trainable params: 592,131 || all params: 110,346,246 || trainable%: 0.5366118209404241


Epoch,Training Loss,Validation Loss
1,0.7941,0.776848
2,0.4018,0.494633
3,0.4295,0.377814
4,0.1942,0.377262



[Classification Metrics]
accuracy: 0.7397
precision: 0.7871
recall: 0.5431
f1: 0.5726
auc_ovr: 0.8896





[Classification Metrics]
accuracy: 0.8140
precision: 0.7667
recall: 0.8475
f1: 0.7952
auc_ovr: 0.9479





[Classification Metrics]
accuracy: 0.8719
precision: 0.8958
recall: 0.8301
f1: 0.8583
auc_ovr: 0.9501





[Classification Metrics]
accuracy: 0.8636
precision: 0.8752
recall: 0.8278
f1: 0.8490
auc_ovr: 0.9509





===== LORA_ALPHA_64 | Running config: {'use_lora': True, 'learning_rate': 0.0006, 'r': 16, 'lora_alpha': 64, 'lora_dropout': 0.05, 'use_class_weights': False} =====




trainable params: 592,131 || all params: 110,346,246 || trainable%: 0.5366118209404241


Epoch,Training Loss,Validation Loss
1,0.8085,0.619942
2,0.4289,0.507394
3,0.3778,0.381115
4,0.1628,0.398593



[Classification Metrics]
accuracy: 0.7769
precision: 0.8498
recall: 0.6454
f1: 0.6979
auc_ovr: 0.9274





[Classification Metrics]
accuracy: 0.8140
precision: 0.7755
recall: 0.8713
f1: 0.8062
auc_ovr: 0.9556





[Classification Metrics]
accuracy: 0.8884
precision: 0.8916
recall: 0.8680
f1: 0.8792
auc_ovr: 0.9527





[Classification Metrics]
accuracy: 0.8802
precision: 0.8772
recall: 0.8610
f1: 0.8687
auc_ovr: 0.9522




Best Alpha: 64


## Stage 5 — LoRA (Tune Dropout)

In [13]:
dropout_candidates = [0.0, 0.05, 0.1]
best_metric = -1

for d in dropout_candidates:
    cfg = copy.deepcopy(best_cfg)
    cfg["lora_dropout"] = d

    metrics = run_experiment(cfg, f"LORA_DROPOUT_{d}")
    score = metrics["f1"]
    
    if score > best_metric:
        best_metric = score
        best_cfg["lora_dropout"] = d

print("Best Dropout:", best_cfg["lora_dropout"])



===== LORA_DROPOUT_0.0 | Running config: {'use_lora': True, 'learning_rate': 0.0006, 'r': 16, 'lora_alpha': 64, 'lora_dropout': 0.0, 'use_class_weights': False} =====




trainable params: 592,131 || all params: 110,346,246 || trainable%: 0.5366118209404241


Epoch,Training Loss,Validation Loss
1,0.7863,0.504665
2,0.3956,0.684081
3,0.3487,0.43326
4,0.166,0.437228



[Classification Metrics]
accuracy: 0.8388
precision: 0.8961
recall: 0.7807
f1: 0.8165
auc_ovr: 0.9336





[Classification Metrics]
accuracy: 0.7562
precision: 0.7139
recall: 0.8244
f1: 0.7389
auc_ovr: 0.9451





[Classification Metrics]
accuracy: 0.8595
precision: 0.8735
recall: 0.8399
f1: 0.8550
auc_ovr: 0.9507





[Classification Metrics]
accuracy: 0.8760
precision: 0.8836
recall: 0.8491
f1: 0.8650
auc_ovr: 0.9514





===== LORA_DROPOUT_0.05 | Running config: {'use_lora': True, 'learning_rate': 0.0006, 'r': 16, 'lora_alpha': 64, 'lora_dropout': 0.05, 'use_class_weights': False} =====




trainable params: 592,131 || all params: 110,346,246 || trainable%: 0.5366118209404241


Epoch,Training Loss,Validation Loss
1,0.8085,0.619942
2,0.4289,0.507394
3,0.3778,0.381115
4,0.1628,0.398593



[Classification Metrics]
accuracy: 0.7769
precision: 0.8498
recall: 0.6454
f1: 0.6979
auc_ovr: 0.9274





[Classification Metrics]
accuracy: 0.8140
precision: 0.7755
recall: 0.8713
f1: 0.8062
auc_ovr: 0.9556





[Classification Metrics]
accuracy: 0.8884
precision: 0.8916
recall: 0.8680
f1: 0.8792
auc_ovr: 0.9527





[Classification Metrics]
accuracy: 0.8802
precision: 0.8772
recall: 0.8610
f1: 0.8687
auc_ovr: 0.9522





===== LORA_DROPOUT_0.1 | Running config: {'use_lora': True, 'learning_rate': 0.0006, 'r': 16, 'lora_alpha': 64, 'lora_dropout': 0.1, 'use_class_weights': False} =====




trainable params: 592,131 || all params: 110,346,246 || trainable%: 0.5366118209404241


Epoch,Training Loss,Validation Loss
1,0.8013,0.569932
2,0.3894,0.578726
3,0.3955,0.379449
4,0.1482,0.388366



[Classification Metrics]
accuracy: 0.8058
precision: 0.8428
recall: 0.7286
f1: 0.7657
auc_ovr: 0.9232





[Classification Metrics]
accuracy: 0.7975
precision: 0.7736
recall: 0.8429
f1: 0.7939
auc_ovr: 0.9508





[Classification Metrics]
accuracy: 0.8719
precision: 0.8772
recall: 0.8540
f1: 0.8650
auc_ovr: 0.9543





[Classification Metrics]
accuracy: 0.8843
precision: 0.8888
recall: 0.8633
f1: 0.8753
auc_ovr: 0.9545




Best Dropout: 0.05


## Stage 6 — LoRA (Class Weights)

In [14]:
for use_weights in [False, True]:
    cfg = copy.deepcopy(best_cfg)
    cfg["use_class_weights"] = use_weights

    metrics = run_experiment(cfg, f"LORA_CLASS_WEIGHT")
    score = metrics["f1"]

    if score > best_metric:
        best_metric = score
        best_cfg["use_class_weights"] = use_weights

print("Best use_class_weights:", best_cfg["use_class_weights"])



===== LORA_CLASS_WEIGHT | Running config: {'use_lora': True, 'learning_rate': 0.0006, 'r': 16, 'lora_alpha': 64, 'lora_dropout': 0.05, 'use_class_weights': False} =====




trainable params: 592,131 || all params: 110,346,246 || trainable%: 0.5366118209404241


Epoch,Training Loss,Validation Loss
1,0.8085,0.619942
2,0.4289,0.507394
3,0.3778,0.381115
4,0.1628,0.398593



[Classification Metrics]
accuracy: 0.7769
precision: 0.8498
recall: 0.6454
f1: 0.6979
auc_ovr: 0.9274





[Classification Metrics]
accuracy: 0.8140
precision: 0.7755
recall: 0.8713
f1: 0.8062
auc_ovr: 0.9556





[Classification Metrics]
accuracy: 0.8884
precision: 0.8916
recall: 0.8680
f1: 0.8792
auc_ovr: 0.9527





[Classification Metrics]
accuracy: 0.8802
precision: 0.8772
recall: 0.8610
f1: 0.8687
auc_ovr: 0.9522





===== LORA_CLASS_WEIGHT | Running config: {'use_lora': True, 'learning_rate': 0.0006, 'r': 16, 'lora_alpha': 64, 'lora_dropout': 0.05, 'use_class_weights': True} =====




trainable params: 592,131 || all params: 110,346,246 || trainable%: 0.5366118209404241


Epoch,Training Loss,Validation Loss
1,1.0035,0.51285
2,0.495,0.469856
3,0.4656,0.443899
4,0.2189,0.439021



[Classification Metrics]
accuracy: 0.7686
precision: 0.7661
recall: 0.7694
f1: 0.7629
auc_ovr: 0.9192





[Classification Metrics]
accuracy: 0.7603
precision: 0.7234
recall: 0.8267
f1: 0.7494
auc_ovr: 0.9480





[Classification Metrics]
accuracy: 0.8554
precision: 0.8647
recall: 0.8255
f1: 0.8432
auc_ovr: 0.9494





[Classification Metrics]
accuracy: 0.8554
precision: 0.8557
recall: 0.8351
f1: 0.8449
auc_ovr: 0.9504


'(ReadTimeoutError("HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: 5543a3d9-4ad0-487e-9b45-fb77cba3fe1d)')' thrown while requesting HEAD https://huggingface.co/yiyanghkust/finbert-tone/resolve/main/config.json
Retrying in 1s [Retry 1/5].


Best use_class_weights: False


In [15]:
print("\n===== FINAL BEST CONFIG =====")
print(best_cfg)



===== FINAL BEST CONFIG =====
{'use_lora': True, 'learning_rate': 0.0006, 'r': 16, 'lora_alpha': 64, 'lora_dropout': 0.05, 'use_class_weights': False}
