# Surrogate-Guided Refinement for Black-Box Optimization

This notebook demonstrates how to use Week 1 and Week 2 inputs/outputs to train surrogate models (SVM classifier + SVR regressor) and generate the next best candidate points for querying a black-box function.

In [1]:
# Import required libraries
import numpy as np
import pandas as pd
from sklearn.svm import SVC, SVR
from sklearn.preprocessing import StandardScaler

## Load Week 1 & Week 3 Data

In [2]:
week_inputs = {
    1: [[0.333333, 0.666667], [0.5, 0.5], [0.45, 0.55]],
    2: [[0.777778, 0.222222], [0.7, 0.3], [0.725, 0.275]],
    3: [[0.142857, 0.571429, 0.857143], [0.2, 0.6, 0.8], [0.8, 0.2, 0.4]],
    4: [[0.285714, 0.714286, 0.428571, 0.857143],
        [0.2, 0.8, 0.3, 0.7],
        [0.25, 0.75, 0.35, 0.65]],
    5: [[0.0625, 0.5, 0.9375, 0.25],
        [0.08, 0.52, 0.92, 0.27],
        [0.07, 0.51, 0.93, 0.26]],
    6: [[0.111111, 0.444444, 0.777778, 0.222222, 0.888889],
        [0.2, 0.5, 0.8, 0.3, 0.9],
        [0.21, 0.49, 0.81, 0.31, 0.91]],
    7: [[0.090909, 0.363636, 0.636364, 0.181818, 0.545455, 0.818182],
        [0.12, 0.38, 0.66, 0.22, 0.58, 0.84],
        [0.10, 0.36, 0.64, 0.20, 0.56, 0.82]],
    8: [[0.125, 0.25, 0.375, 0.5, 0.625, 0.75, 0.875, 0.0625],
        [0.15, 0.275, 0.4, 0.525, 0.65, 0.775, 0.9, 0.1],
        [0.13, 0.26, 0.38, 0.51, 0.63, 0.76, 0.88, 0.07]]
}

week_outputs = {
    1: [5.72e-48, 2.67e-9, 1.55e-13],
    2: [0.1668, 0.4380, 0.4116],
    3: [-0.0351, -0.0651, -0.0390],
    4: [-16.18, -15.30, -11.86],
    5: [94.62, 73.85, 85.55],
    6: [-1.77, -1.72, -1.82],
    7: [1.06, 0.838, 1.01],
    8: [8.67, 8.53, 8.63]
}

## Surrogate Training and Candidate Generation

In [3]:
best_inputs = {}

for f in range(1, 9):
    X_prior = np.array(week_inputs[f])
    y_prior = np.array(week_outputs[f])

    # Label outputs: high vs low
    threshold = np.median(y_prior)
    y_labels = (y_prior >= threshold).astype(int)

    # Scale inputs
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X_prior)

    # Train surrogates
    svm_clf = SVC(kernel='rbf', probability=True, C=1.0)
    svm_clf.fit(X_scaled, y_labels)

    svr = SVR(kernel='rbf', C=1.0, epsilon=0.1)
    svr.fit(X_scaled, y_prior)

    # Candidate generation
    top_idx = np.argsort(y_prior)[-1]
    X_top = X_prior[top_idx].reshape(1, -1)

    perturb = np.clip(X_top + np.random.normal(0, 0.05, X_top.shape), 0, 1)
    sym_flip = 1 - X_top
    random_probe = np.random.rand(10, X_prior.shape[1])
    mid_range = np.full((1, X_prior.shape[1]), 0.5)

    X_candidates = np.vstack([perturb, sym_flip, random_probe, mid_range])

    # SVM filter (lower threshold)
    probs_high = svm_clf.predict_proba(scaler.transform(X_candidates))[:, 1]
    mask = probs_high > 0.4
    X_confirmed = X_candidates[mask]

    # SVR ranking
    if len(X_confirmed) > 0:
        pred_scores = svr.predict(scaler.transform(X_confirmed))
        rank = np.argsort(pred_scores)[::-1]
        top3 = X_confirmed[rank[:3]]
        best = X_confirmed[rank[0]]
    else:
        pred_scores = svr.predict(scaler.transform(X_candidates))
        rank = np.argsort(pred_scores)[::-1]
        top3 = X_candidates[rank[:3]]
        best = X_candidates[rank[0]]

    # Store results
    best_inputs[f] = {"top3": top3, "best": best}

## Display Results

In [4]:
for f, res in best_inputs.items():
    print(f"\nFunction {f} top 3 candidates:")
    print(pd.DataFrame(res["top3"], columns=[f"x{i+1}" for i in range(res["top3"].shape[1])]))
    print("\nBest candidate:")
    print(pd.DataFrame([res["best"]], columns=[f"x{i+1}" for i in range(res["best"].shape[0])]))


Function 1 top 3 candidates:
         x1        x2
0  0.566886  0.375565
1  0.097394  0.464807
2  0.929681  0.857731

Best candidate:
         x1        x2
0  0.566886  0.375565

Function 2 top 3 candidates:
         x1        x2
0  0.734836  0.272711
1  0.576304  0.355436
2  0.532720  0.429563

Best candidate:
         x1        x2
0  0.734836  0.272711

Function 3 top 3 candidates:
         x1        x2        x3
0  0.500000  0.500000  0.500000
1  0.475117  0.811818  0.350717
2  0.170658  0.833102  0.327947

Best candidate:
    x1   x2   x3
0  0.5  0.5  0.5

Function 4 top 3 candidates:
         x1        x2        x3        x4
0  0.269538  0.838922  0.299491  0.551417
1  0.315110  0.778444  0.009705  0.352063
2  0.628709  0.762601  0.318785  0.361546

Best candidate:
         x1        x2        x3        x4
0  0.269538  0.838922  0.299491  0.551417

Function 5 top 3 candidates:
         x1        x2        x3        x4
0  0.500000  0.500000  0.500000  0.500000
1  0.515002  0.84195