In [None]:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Step 1: Create data
X, y = make_classification(n_samples=1000, n_features=10, n_informative=5, random_state=42)

# Split into labeled and unlabeled pools
n_initial = 20
X_labeled, y_labeled = X[:n_initial], y[:n_initial]
X_unlabeled, y_unlabeled = X[n_initial:], y[n_initial:]

# Step 2: Active Learning Loop
model = LogisticRegression()

for i in range(5):  # run 5 active learning rounds
    model.fit(X_labeled, y_labeled)
    y_pred = model.predict(X_unlabeled)
    probs = model.predict_proba(X_unlabeled)
    
    # Step 3: Find most uncertain samples (closest to 0.5 probability)
    uncertainty = np.abs(probs[:, 1] - 0.5)
    query_idx = np.argsort(uncertainty)[:10]  # select 10 most uncertain
    
    # Step 4: "Label" them (simulate by using true labels)
    X_new, y_new = X_unlabeled[query_idx], y_unlabeled[query_idx]
    
    # Add them to labeled set
    X_labeled = np.concatenate([X_labeled, X_new])
    y_labeled = np.concatenate([y_labeled, y_new])
    
    # Remove from unlabeled pool
    mask = np.ones(len(X_unlabeled), dtype=bool)
    mask[query_idx] = False
    X_unlabeled, y_unlabeled = X_unlabeled[mask], y_unlabeled[mask]
    
    # Evaluate model
    acc = accuracy_score(y[:200], model.predict(X[:200]))
    print(f"Round {i+1}: Labeled {len(X_labeled)} samples, Accuracy = {acc:.3f}")

print("✅ Active learning completed.")
