# Do You Need a Human in the Loop?

## Active learning workflow using PyTorch and modAL

In [7]:
import numpy as np
import torch
from modAL.models import ActiveLearner
from modAL.uncertainty import uncertainty_sampling
from torch.utils.data import Dataset, DataLoader

In [8]:
# Define a PyTorch Dataset
class CustomDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# Define a PyTorch model
class SimpleNN(torch.nn.Module):
    def __init__(self, input_dim):
        super(SimpleNN, self).__init__()
        self.fc1 = torch.nn.Linear(input_dim, 50)
        self.relu = torch.nn.ReLU()
        self.fc2 = torch.nn.Linear(50, 2)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        return self.fc2(x)

In [9]:
# PyTorch model wrapper
class PyTorchClassifier:
    def __init__(self, model, criterion, optimizer):
        self.model = model
        self.criterion = criterion
        self.optimizer = optimizer

    def fit(self, X, y, epochs=1):
        self.model.train()
        dataset = CustomDataset(X, y)
        loader = DataLoader(dataset, batch_size=10, shuffle=True)
        for epoch in range(epochs):
            for batch_idx, (data, target) in enumerate(loader):
                self.optimizer.zero_grad()
                output = self.model(data)
                loss = self.criterion(output, target)
                loss.backward()
                self.optimizer.step()

    def predict(self, X):
        self.model.eval()
        X_tensor = torch.Tensor(X)
        with torch.no_grad():
            y_pred = self.model(X_tensor).argmax(dim=1)
        return y_pred.cpu().numpy()

    def predict_proba(self, X):
        self.model.eval()
        X_tensor = torch.Tensor(X)
        with torch.no_grad():
            y_pred_proba = torch.nn.functional.softmax(self.model(X_tensor), dim=1)
        return y_pred_proba.cpu().numpy()

In [10]:
# Initialize model, optimizer, and loss function
model = SimpleNN(input_dim=20)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [11]:
# Generate synthetic data
X_raw = np.random.randn(1000, 20).astype(np.float32)
y_raw = np.random.randint(0, 2, (1000,)).astype(np.int64)

# Initial labeled data
initial_idx = np.random.choice(range(1000), size=100, replace=False)
X_initial = X_raw[initial_idx]
y_initial = y_raw[initial_idx]

# Remaining unlabeled data
X_pool = np.delete(X_raw, initial_idx, axis=0)
y_pool = np.delete(y_raw, initial_idx, axis=0)

# Convert to PyTorch datasets
initial_dataset = CustomDataset(X_initial, y_initial)
pool_dataset = CustomDataset(X_pool, y_pool)

# Wrap the PyTorch model with the custom classifier
pytorch_classifier = PyTorchClassifier(model, criterion, optimizer)


In [12]:
# Define the active learner
learner = ActiveLearner(
    estimator=pytorch_classifier,
    query_strategy=uncertainty_sampling,
    X_training=initial_dataset.X,  # initial training data
    y_training=initial_dataset.y,  # initial training labels
)

In [13]:
# Active learning loop
n_queries = 10
for i in range(n_queries):
    # Query for labels
    query_idx, query_instance = learner.query(pool_dataset.X, n_instances=10)
    query_label = pool_dataset.y[query_idx]

    # Update pool (simulate labeling and removing queried instances)
    pool_dataset.X = np.delete(pool_dataset.X, query_idx, axis=0)
    pool_dataset.y = np.delete(pool_dataset.y, query_idx, axis=0)

    # Teach the model with the new data
    learner.teach(query_instance, query_label)

    # Print the progress
    print(f'Query {i}: Model was trained with {len(query_idx)} new instances.')

Query 0: Model was trained with 10 new instances.
Query 1: Model was trained with 10 new instances.
Query 2: Model was trained with 10 new instances.
Query 3: Model was trained with 10 new instances.
Query 4: Model was trained with 10 new instances.
Query 5: Model was trained with 10 new instances.
Query 6: Model was trained with 10 new instances.
Query 7: Model was trained with 10 new instances.
Query 8: Model was trained with 10 new instances.
Query 9: Model was trained with 10 new instances.
