  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

Using device: cuda


In [3]:
cfg = ExperimentConfig(
    seed=42,
    total_rounds=5,
    experiment_name="dummy_test_pipeline",
    save_dir=Path("./experiments"),

    # Pool settings
    initial_pool_size=200,
    acquisition_batch_size=256,

    # Model
    model_name_or_path="distilbert-base-uncased",
    num_labels=4, # TODO: maybe make it figure it out on its own based on dataset
    tokenizer_kwargs={
        "max_length": 128,
        "padding": "max_length",
        "truncation": True,
        "add_special_tokens": True,
        "return_tensors": "pt"
    },

    # Dataset names (for reference)
    data="agnews",

    # Strategy
    strategy_class="DeltaF1Strategy",
    strategy_kwargs={"epsilon": 0.01, "k": 2}, # The base params are passed internally, only strategy specific params needed here

    optimizer_class = "Adam",
    optimizer_kwargs = {"lr": 1e-3, "weight_decay": 1e-4},

    criterion_class = "CrossEntropyLoss",
    criterion_kwargs = {},

    scheduler_class = "StepLR",
    scheduler_kwargs = {"step_size": 10, "gamma": 0.1},

    # Sampler
    sampler_class="RandomSampler",
    sampler_kwargs={"seed": 42},
    # sampler_class="EntropySampler",
    # sampler_kwargs={"show_progress": True},

    # Training
    device=device,
    epochs=3,
    batch_size=64
)

In [4]:
al = ActiveLearning(cfg)

INFO:root:Loading tokenizer and model from 'distilbert-base-uncased'...
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


ValueError: not enough values to unpack (expected 3, got 2)

## Just checking if it actually works

In [None]:
print(f"Initial pool stats: {al.pool.get_pool_stats()}")

In [None]:
round_stats = al.train_one_round(new_indices=None)
print(f"Round 1 completed. Val F1: {round_stats['f1_score']:.4f}, Training Time: {round_stats['training_time']:.2f}s")

In [None]:
new_indices = al.sample_next_batch()
print(f"Sampled {len(new_indices)} new indices: {new_indices[:5]} ...")

In [None]:
round_stats = al.train_one_round(new_indices=new_indices)
print(f"Round 2 completed. Val F1: {round_stats['f1_score']:.4f}, Training Time: {round_stats['training_time']:.2f}s")

In [None]:
num_additional_rounds = 3
for r in range(num_additional_rounds):
    print(f"\n--- Round {al.current_round + 1}")

    new_indices = al.sample_next_batch()
    if not new_indices:
        print("No more unlabeled data available!")
        break

    round_stats = al.train_one_round(new_indices=new_indices)
    print(f"Val F1: {round_stats['f1_score']:.4f}, Training Time: {round_stats['training_time']:.2f}s")
    print(f"Pool Stats: {round_stats['pool_stats']}")

# FULL PIPELINE HERE ! ! !

In [None]:
final_metrics = al.run_full_pipeline()
print(f"Final Test Metrics: F1={final_metrics['f1_score']:.4f}, Accuracy={final_metrics['accuracy']:.4f}, Loss={final_metrics['loss']:.4f}")

In [None]:
al.save_experiment()

In [None]:
with open(r"experiments/dummy_test_pipeline/results_20250829_171203.json", 'r') as f:
    experiment_data = json.load(f)

In [None]:
print(experiment_data.keys())

In [None]:
experiment_data['cfg']

In [None]:
experiment_data['total_rounds']

In [None]:
experiment_data['round_val_stats'][-1]

In [None]:
experiment_data["final_pool_stats"]

In [None]:
experiment_data["final_test_stats"]