In [1]:
import torch
from pathlib import Path
import logging
import json

from adaptive_al_v2.active_learning import ActiveLearning, ExperimentConfig

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

Using device: cuda


In [3]:
cfg = ExperimentConfig(
    seed=42,
    total_rounds=5,
    experiment_name="dummy_test_pipeline",
    save_dir=Path("./experiments"),

    # Pool settings
    initial_pool_size=100,
    acquisition_batch_size=16,

    # Model
    model_class="SimpleTextClassifier",
    model_kwargs={"hidden_dim": 32, "num_classes": 4},

    # Strategy
    strategy_class="FineTuneStrategy",
    strategy_kwargs={}, # The base params are passed internally, only strategy specific params needed here

    optimizer_class = "Adam",
    optimizer_kwargs = {"lr": 1e-3, "weight_decay": 1e-4},

    criterion_class = "CrossEntropyLoss",
    criterion_kwargs = {},

    scheduler_class = "StepLR",
    scheduler_kwargs = {"step_size": 10, "gamma": 0.1},

    # Sampler
    sampler_class="RandomSampler",
    sampler_kwargs={"seed": 42},

    # Training
    device=device,
    epochs=3,
    batch_size=8,

    # Dataset names (for reference)
    data="agnews"
)

In [4]:
al = ActiveLearning(cfg)

## Just checking if it actually works

In [5]:
print(f"Initial pool stats: {al.pool.get_pool_stats()}")

Initial pool stats: {'labeled_count': 100, 'unlabeled_count': 107900, 'total_count': 108000}


In [6]:
round_stats = al.train_one_round(new_indices=None)
print(f"Round 1 completed. Val F1: {round_stats['f1_score']:.4f}, Training Time: {round_stats['training_time']:.2f}s")

Round 1 completed. Val F1: 0.1146, Training Time: 0.12s


In [7]:
new_indices = al.sample_next_batch()
print(f"Sampled {len(new_indices)} new indices: {new_indices[:5]} ...")

Sampled 16 new indices: [83887, 14608, 3283, 97289, 36085] ...


In [8]:
round_stats = al.train_one_round(new_indices=new_indices)
print(f"Round 2 completed. Val F1: {round_stats['f1_score']:.4f}, Training Time: {round_stats['training_time']:.2f}s")

Round 2 completed. Val F1: 0.0995, Training Time: 0.04s


In [9]:
num_additional_rounds = 3
for r in range(num_additional_rounds):
    print(f"\n--- Round {al.current_round + 1}")

    new_indices = al.sample_next_batch()
    if not new_indices:
        print("No more unlabeled data available!")
        break

    round_stats = al.train_one_round(new_indices=new_indices)
    print(f"Val F1: {round_stats['f1_score']:.4f}, Training Time: {round_stats['training_time']:.2f}s")
    print(f"Pool Stats: {round_stats['pool_stats']}")


--- Round 3
Val F1: 0.1111, Training Time: 0.05s
Pool Stats: {'labeled_count': 132, 'unlabeled_count': 107868, 'total_count': 108000}

--- Round 4
Val F1: 0.1270, Training Time: 0.04s
Pool Stats: {'labeled_count': 148, 'unlabeled_count': 107852, 'total_count': 108000}

--- Round 5
Val F1: 0.1140, Training Time: 0.05s
Pool Stats: {'labeled_count': 164, 'unlabeled_count': 107836, 'total_count': 108000}


In [25]:
# # Still doesnt show logs for some reason
# logging.basicConfig(
#     level=logging.INFO,
#     format="%(asctime)s | %(levelname)s | %(message)s",
#     datefmt="%H:%M:%S"
# )

# FULL PIPELINE HERE ! ! !

In [10]:
final_metrics = al.run_full_pipeline()
print(f"Final Test Metrics: F1={final_metrics['f1_score']:.4f}, Accuracy={final_metrics['accuracy']:.4f}, Loss={final_metrics['loss']:.4f}")

Final Test Metrics: F1=0.1000, Accuracy=0.2500, Loss=2.0380


In [11]:
al.save_experiment()

In [12]:
with open(r"./experiments/dummy_test_pipeline/results_20250828_213116.json", 'r') as f:
    experiment_data = json.load(f)

In [13]:
print(experiment_data.keys())

dict_keys(['cfg', 'total_rounds', 'round_val_stats', 'final_pool_stats', 'final_test_stats'])


In [14]:
experiment_data['cfg']

{'seed': 42,
 'total_rounds': 5,
 'initial_pool_size': 100,
 'acquisition_batch_size': 16,
 'sampler_class': 'RandomSampler',
 'sampler_kwargs': {'seed': 42},
 'strategy_class': 'FineTuneStrategy',
 'strategy_kwargs': {},
 'model_class': 'SimpleTextClassifier',
 'model_kwargs': {'hidden_dim': 32, 'num_classes': 4},
 'optimizer_class': 'Adam',
 'optimizer_kwargs': {'lr': 0.001},
 'criterion_class': 'CrossEntropyLoss',
 'criterion_kwargs': {},
 'scheduler_class': 'StepLR',
 'scheduler_kwargs': {'step_size': 10, 'gamma': 0.1},
 'device': 'cuda',
 'epochs': 3,
 'batch_size': 8,
 'data': 'agnews',
 'save_dir': 'experiments',
 'experiment_name': 'dummy_test_pipeline'}

In [15]:
experiment_data['total_rounds']

5

In [16]:
experiment_data['round_val_stats'][-1]

{'training_time': 0.05011439323425293,
 'avg_loss': 11.27958299243261,
 'epochs': 3,
 'total_samples': 164,
 'new_samples': 16,
 'loss': 2.7253062326510746,
 'f1_score': 0.2082740227777865,
 'accuracy': 0.28041666666666665,
 'pool_stats': {'labeled_count': 164,
  'unlabeled_count': 107836,
  'total_count': 108000}}

In [17]:
experiment_data["final_pool_stats"]

{'labeled_count': 164, 'unlabeled_count': 107836, 'total_count': 108000}

In [18]:
experiment_data["final_test_stats"]

{'loss': 2.7029831030494287,
 'f1_score': 0.20674316975453216,
 'accuracy': 0.2811842105263158}