In [1]:
import optuna
from transformers import Trainer, TrainingArguments, AutoModelForSequenceClassification, AutoTokenizer
from sklearn.metrics import f1_score
import numpy as np
import os
import torch
import json
import data_loader
import tqdm.auto as tqdm
with open("../../config.json", "r") as f:
    config = json.load(f)



  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def compute_metrics(eval_pred):
    """
    Compute macro-F1 score for model evaluation.

    Args:
        eval_pred (tuple): Tuple of (logits, true labels).

    Returns:
        dict: Dictionary with macro-F1 score.
    """
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=1)
    return {
        "macro_f1": f1_score(labels, preds, average="macro")
    }

In [3]:
def model_init(model_name, num_labels):
    """
    Initialize a model with a given architecture and number of labels.

    Args:
        model_name (str): HuggingFace model identifier.
        num_labels (int): Number of output classes.

    Returns:
        function: A function that returns a new model instance.
    """
    return lambda: AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)

In [4]:
def search_best_hparams(train_dataset, val_dataset, model_name="distilbert-base-uncased",
                        num_labels=2, n_trials=50, output_dir="optuna_search"):
    """
    Perform hyperparameter tuning using Optuna.

    Args:
        train_dataset (Dataset): HuggingFace Dataset for training.
        val_dataset (Dataset): HuggingFace Dataset for validation.
        model_name (str): Pretrained model name (default: DistilBERT).
        num_labels (int): Number of classification labels.
        n_trials (int): Number of Optuna trials.
        output_dir (str): Folder to save training outputs.

    Returns:
        dict: Best hyperparameters found by Optuna.
    """
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    def preprocess(examples):
        """Tokenize the input text."""
        return tokenizer(examples['text'], truncation=True, padding='max_length', max_length=256)


    tokenized_train = train_dataset.map(preprocess, batched=True)
    tokenized_val = val_dataset.map(preprocess, batched=True)

    def objective(trial):
        """
        Objective function for Optuna optimization.

        Defines the search space and returns the evaluation metric to maximize.
        """
        import random
        def set_seed(seed=42):
            random.seed(seed)
            np.random.seed(seed)
            torch.manual_seed(seed)
            if torch.cuda.is_available():
                torch.cuda.manual_seed_all(seed)

        set_seed(42)


        args = TrainingArguments(
            output_dir=os.path.join(output_dir, f"trial_{trial.number}"),
            save_strategy="no",
            learning_rate=trial.suggest_float("learning_rate", 1e-5, 5e-5, log=True),
            per_device_train_batch_size=trial.suggest_categorical("batch_size", [8, 16, 32]),
            num_train_epochs=trial.suggest_int("epochs", 2, 5),
            logging_steps=10,
            eval_steps=50,
            disable_tqdm=True,
            report_to="none",
            weight_decay=trial.suggest_float("weight_decay", 0.0, 0.1),
            warmup_steps=trial.suggest_int("warmup_steps", 0, 500),
        )


        trainer = Trainer(
            model_init=model_init(model_name, num_labels),
            args=args,
            train_dataset=tokenized_train,
            eval_dataset=tokenized_val,
            compute_metrics=compute_metrics,
        )

        trainer.train()
        metrics = trainer.evaluate()
        return metrics["eval_macro_f1"]

    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=n_trials,show_progress_bar=True,timeout=1800)
    optuna.visualization.plot_optimization_history(study)
    optuna.visualization.plot_param_importances(study)

    return study.best_params


## chooing hyperparameters

In [5]:
initial_pool_size = config["initial_pool_size"]

### Agnews

In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Load all data
train_dataset, val_dataset, test_dataset = data_loader.load_agnews()


# Sample initial pool stratified by label
initial_pool, _ = train_test_split(
    train_dataset,
    train_size=initial_pool_size,
    stratify=train_dataset["label"],
    random_state=42
)

print(initial_pool.shape)
print(initial_pool["label"].value_counts())


(200, 2)
1    50
0    50
2    50
3    50
Name: label, dtype: int64


In [7]:
# Run hyperparameter search
from datasets import Dataset
train_dataset = Dataset.from_pandas(initial_pool)
val_dataset = Dataset.from_pandas(val_dataset)
val_dataset=val_dataset.shuffle(seed=42).select(range(500))

In [8]:
agnws = search_best_hparams(train_dataset, val_dataset, model_name="distilbert-base-uncased",num_labels=4 )

# Save the best parameters
# import json, os
# os.makedirs("optuna_search", exist_ok=True)
# with open(os.path.join("optuna_search", "best_params_agnews.json"), "w") as f:
#     json.dump(agnws, f, indent=2)


Map: 100%|██████████| 200/200 [00:00<00:00, 3376.25 examples/s]
Map: 100%|██████████| 500/500 [00:00<00:00, 6676.89 examples/s]
[I 2025-06-22 21:26:48,161] A new study created in memory with name: no-name-1818dafa-af2f-45d7-83d3-315a54077205
  0%|          | 0/50 [00:00<?, ?it/s]Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'loss': 1.3791, 'grad_norm': 2.0598487854003906, 'learning_rate': 3.161253558552717e-07, 'epoch': 0.4}
{'loss': 1.3849, 'grad_norm': 2.109316349029541, 'learning_rate': 6.673757512500181e-07, 'epoch': 0.8}
{'loss': 1.3747, 'grad_norm': 1.9343794584274292, 'learning_rate': 1.0186261466447645e-06, 'epoch': 1.2}
{'loss': 1.3758, 'grad_norm': 1.8497302532196045, 'learning_rate': 1.369876542039511e-06, 'epoch': 1.6}
{'loss': 1.3784, 'grad_norm': 2.253995180130005, 'learning_rate': 1.7211269374342572e-06, 'epoch': 2.0}
{'loss': 1.3583, 'grad_norm': 2.4570188522338867, 'learning_rate': 2.0723773328290036e-06, 'epoch': 2.4}
{'loss': 1.3645, 'grad_norm': 3.0594639778137207, 'learning_rate': 2.42362772822375e-06, 'epoch': 2.8}
{'train_runtime': 439.3293, 'train_samples_per_second': 1.366, 'train_steps_per_second': 0.171, 'train_loss': 1.3728361320495606, 'epoch': 3.0}


Best trial: 0. Best value: 0.357035:   2%|▏         | 1/50 [08:20<6:48:58, 500.80s/it, 500.79/1800 seconds]

{'eval_loss': 1.3529845476150513, 'eval_macro_f1': 0.35703489617157, 'eval_runtime': 59.6277, 'eval_samples_per_second': 8.385, 'eval_steps_per_second': 1.057, 'epoch': 3.0}
[I 2025-06-22 21:35:08,949] Trial 0 finished with value: 0.35703489617157 and parameters: {'learning_rate': 1.015113642690817e-05, 'batch_size': 8, 'epochs': 3, 'weight_decay': 0.0647265149192234, 'warmup_steps': 289}. Best is trial 0 with value: 0.35703489617157.


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'loss': 1.3845, 'grad_norm': 1.3488456010818481, 'learning_rate': 2.977488257640279e-07, 'epoch': 1.4285714285714286}
{'loss': 1.3833, 'grad_norm': 1.2603083848953247, 'learning_rate': 6.285808543907256e-07, 'epoch': 2.857142857142857}
{'train_runtime': 365.3342, 'train_samples_per_second': 1.642, 'train_steps_per_second': 0.057, 'train_loss': 1.3826742001942225, 'epoch': 3.0}


Best trial: 0. Best value: 0.357035:   4%|▍         | 2/50 [15:38<6:11:06, 463.89s/it, 938.85/1800 seconds]

{'eval_loss': 1.3814243078231812, 'eval_macro_f1': 0.12146293962653223, 'eval_runtime': 71.4177, 'eval_samples_per_second': 7.001, 'eval_steps_per_second': 0.882, 'epoch': 3.0}
[I 2025-06-22 21:42:27,000] Trial 1 finished with value: 0.12146293962653223 and parameters: {'learning_rate': 1.544985573686678e-05, 'batch_size': 32, 'epochs': 3, 'weight_decay': 0.06251004940582387, 'warmup_steps': 467}. Best is trial 0 with value: 0.35703489617157.


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'loss': 1.3789, 'grad_norm': 2.059345006942749, 'learning_rate': 7.59813038259011e-07, 'epoch': 0.4}
{'loss': 1.3839, 'grad_norm': 2.111571788787842, 'learning_rate': 1.6040497474356898e-06, 'epoch': 0.8}


Best trial: 0. Best value: 0.357035:   4%|▍         | 2/50 [17:44<7:05:57, 532.45s/it, 938.85/1800 seconds]


[W 2025-06-22 21:44:33,033] Trial 2 failed with parameters: {'learning_rate': 1.6462615828945238e-05, 'batch_size': 8, 'epochs': 2, 'weight_decay': 0.03999596550792106, 'warmup_steps': 195} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "C:\Users\Nagham Omar\PycharmProjects\deltaf1-hybrid-active-learning\.venv\lib\site-packages\optuna\study\_optimize.py", line 201, in _run_trial
    value_or_values = func(trial)
  File "C:\Users\Nagham Omar\AppData\Local\Temp\ipykernel_4328\378710210.py", line 67, in objective
    trainer.train()
  File "C:\Users\Nagham Omar\PycharmProjects\deltaf1-hybrid-active-learning\.venv\lib\site-packages\transformers\trainer.py", line 2240, in train
    return inner_training_loop(
  File "C:\Users\Nagham Omar\PycharmProjects\deltaf1-hybrid-active-learning\.venv\lib\site-packages\transformers\trainer.py", line 2555, in _inner_training_loop
    tr_loss_step = self.training_step(model, inputs, num_items_in_batch)
  Fi

KeyboardInterrupt: 