## CPAL versus Deep Active Learning Baselines on Quadratic Regression

In this tutorial, we walk through how to use CPAL (with two-layer ReLU Networks), linear cutting-plane active learning, DeepAL baselines, and Skactive baselines on a synthetic regression dataset.

In [None]:
import sys
sys.path.append('..')
from src.cpal.cpal import *
from src.baselines.skactive_baseline import *
from deepal_baseline.deepal_reg import run_active_learning
from types import SimpleNamespace
from src.baselines.evaluation import *
from src.baselines.linear_cp import *
from src.cpal.synthetic_data import *
from src.cpal.plot import *

In [None]:
# Add function to suppress printing
import contextlib

@contextlib.contextmanager
def suppress_print():
    with open(os.devnull, 'w') as fnull:
        old_stdout = sys.stdout
        sys.stdout = fnull
        try:
            yield
        finally:
            sys.stdout = old_stdout

### 1. Generate synthetic data for regression

In [None]:
X_all, y_all, X, y, X_test, y_test = generate_quadratic_regression()

### 2. CPAL (before-final-solve versus after-final-solve)

In [None]:
# 2. Run CPAL to actively learn on 15 data points
dmat = generate_hyperplane_arrangement(X = X)
C, c, used = cutting_plane('r', X, y, dmat, n_points = 15)
print(f'used: {used}')

In [None]:
n_train, m = dmat.shape
d = X.shape[1]
theta_matrix = np.reshape(c, (2*d,m), order ='F')
Uopt1_v = theta_matrix[:d]
Uopt2_v = theta_matrix[d:]
# do the final convex solve
Uopt1_final_v, Uopt2_final_v, _ = convex_solve(used, X, y, dmat)

In [None]:
Uopt1_list =  [Uopt1_final_v, Uopt1_v]
Uopt2_list =  [Uopt2_final_v, Uopt2_v]
visualize_quadratic_regression(Uopt1_list, Uopt2_list, X_all, X, y, X_test, y_test, used, alpha = 0.95, plot_band = False, title = 'CPAL on Quadratic Regression (BFS vs AFS)')

### 3. Linear Cutting-Plane Baseline

Since quadratic regression is nonlinear, linear cutting-plane learning will eventually become infeasible. Here, we select only 4 points—just enough before the solver breaks down due to infeasibility.

In [None]:
C, c, used = linear_cutting_plane_regression(X, y, n_points = 4)
print(f'used: {used}')

In [None]:
visualize_regression_linear(c, X_all, X, y, X_test, y_test, used, plot_band = False)

### 4. Skactive baseline

In [None]:
# first, process the data to be compatiable with skactive
X_all_skactive, X_skactive, X_test_skactive = adapt_data_for_scikit_al(X_all, X, X_test)

In [None]:
strategies = ['greedyX', 'greedyTarget', 'qbc', 'kldiv']
results = run_active_learning_strategies(
    X_all=X_all_skactive,
    y_all=y_all,
    X=X_skactive,
    y=y,
    X_test=X_test_skactive,
    y_test=y_test,
    strategies=strategies,
    active_learning_fn=active_learning_skactiveml,  # your function
    data_budget=10,
    show_plots = False,
    save_plots = False
)

In [None]:
# add cpal results to the plots
cpal_dict = format_selected_data(X, y, used, 'cpal')
results.update(cpal_dict)

### 5. Run DeepAL baseline

In [None]:
deepal_selected_indices = []
deepal_strategies = ["RandomSampling", "LeastConfidence", "EntropySampling","KMeansSampling", "BALDDropout"]

with suppress_print():
    for al in deepal_strategies:
        args = SimpleNamespace(
            seed=1,
            n_init_labeled=1,
            n_query=1,
            n_round=14,
            dataset_name='Quadratic',
            strategy_name=al
        )

        selected_indices, rmse_train, rmse_test, strategy, dataset, net, device = run_active_learning(args)
        deepal_selected_indices.append(selected_indices)

### 6. Compare all active learning strategies

In [None]:
# update results dictionary
for al, ind in zip(deepal_strategies, deepal_selected_indices):
    al_dict = format_selected_data(X, y, ind, al)
    results.update(al_dict)

In [None]:
evaluate_dnn_from_al_results_regression(results, X_all, y_all)