# Surrogate-Guided Refinement for Black-Box Optimization

This notebook demonstrates how to use Week 1 and Week 2 inputs/outputs to train surrogate models (SVM classifier + SVR regressor) and generate the next best candidate points for querying a black-box function.

In [6]:
# Import required libraries
import numpy as np
import pandas as pd
from sklearn.svm import SVC, SVR
from sklearn.preprocessing import StandardScaler

## Load Week 1 & Week 2 Data

In [10]:
# Week 1 inputs and outputs
week1_inputs = {
    1: [[0.333333, 0.666667]],
    2: [[0.777778, 0.222222]],
    3: [[0.142857, 0.571429, 0.857143]],
    4: [[0.285714, 0.714286, 0.428571, 0.857143]],
    5: [[0.0625, 0.5, 0.9375, 0.25]],
    6: [[0.111111, 0.444444, 0.777778, 0.222222, 0.888889]],
    7: [[0.090909, 0.363636, 0.636364, 0.181818, 0.545455, 0.818182]],
    8: [[0.125, 0.25, 0.375, 0.5, 0.625, 0.75, 0.875, 0.0625]]
}
week1_outputs = {
    1: [5.72e-48], 2: [0.1668], 3: [-0.0351], 4: [-16.18],
    5: [94.62], 6: [-1.77], 7: [1.06], 8: [8.67]
}

# Week 2 inputs and outputs
week2_inputs = {
    1: [[0.5, 0.5]],
    2: [[0.7, 0.3]],
    3: [[0.2, 0.6, 0.8]],
    4: [[0.2, 0.8, 0.3, 0.7]],
    5: [[0.08, 0.52, 0.92, 0.27]],
    6: [[0.2, 0.5, 0.8, 0.3, 0.9]],
    7: [[0.12, 0.38, 0.66, 0.22, 0.58, 0.84]],
    8: [[0.15, 0.275, 0.4, 0.525, 0.65, 0.775, 0.9, 0.1]]
}
week2_outputs = {
    1: [2.67e-9], 2: [0.438], 3: [-0.065], 4: [-15.30],
    5: [73.85], 6: [-1.72], 7: [0.838], 8: [8.53]
}

## Surrogate Training and Candidate Generation

In [11]:
best_inputs = {}

for f in range(1, 9):
    # Combine Week 1 & 2 data
    X_prior = np.vstack([week1_inputs[f], week2_inputs[f]])
    y_prior = np.concatenate([week1_outputs[f], week2_outputs[f]])

    # Label outputs: high vs low
    threshold = np.median(y_prior)
    y_labels = (y_prior >= threshold).astype(int)

    # Scale inputs
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X_prior)

    # Train surrogates
    svm_clf = SVC(kernel='rbf', probability=True, C=1.0)
    svm_clf.fit(X_scaled, y_labels)

    svr = SVR(kernel='rbf', C=1.0, epsilon=0.1)
    svr.fit(X_scaled, y_prior)

    # Candidate generation
    top_idx = np.argsort(y_prior)[-1]
    X_top = X_prior[top_idx].reshape(1, -1)

    perturb = np.clip(X_top + np.random.normal(0, 0.05, X_top.shape), 0, 1)
    sym_flip = 1 - X_top
    random_probe = np.random.rand(10, X_prior.shape[1])
    mid_range = np.full((1, X_prior.shape[1]), 0.5)

    X_candidates = np.vstack([perturb, sym_flip, random_probe, mid_range])

    # SVM filter (lower threshold)
    probs_high = svm_clf.predict_proba(scaler.transform(X_candidates))[:, 1]
    mask = probs_high > 0.4
    X_confirmed = X_candidates[mask]

    # SVR ranking
    if len(X_confirmed) > 0:
        pred_scores = svr.predict(scaler.transform(X_confirmed))
        rank = np.argsort(pred_scores)[::-1]
        top3 = X_confirmed[rank[:3]]
        best = X_confirmed[rank[0]]
    else:
        pred_scores = svr.predict(scaler.transform(X_candidates))
        rank = np.argsort(pred_scores)[::-1]
        top3 = X_candidates[rank[:3]]
        best = X_candidates[rank[0]]

    # Store results
    best_inputs[f] = {"top3": top3, "best": best}

## Display Results

In [12]:
for f, res in best_inputs.items():
    print(f"\nFunction {f} top 3 candidates:")
    print(pd.DataFrame(res["top3"], columns=[f"x{i+1}" for i in range(res["top3"].shape[1])]))
    print("\nBest candidate:")
    print(pd.DataFrame([res["best"]], columns=[f"x{i+1}" for i in range(res["best"].shape[0])]))


Function 1 top 3 candidates:
         x1        x2
0  0.724402  0.063698
1  0.957865  0.611343
2  0.932474  0.707054

Best candidate:
         x1        x2
0  0.724402  0.063698

Function 2 top 3 candidates:
         x1        x2
0  0.650806  0.263343
1  0.780393  0.521296
2  0.500000  0.500000

Best candidate:
         x1        x2
0  0.650806  0.263343

Function 3 top 3 candidates:
         x1        x2        x3
0  0.500000  0.500000  0.500000
1  0.997987  0.441381  0.965249
2  0.472386  0.263174  0.367130

Best candidate:
    x1   x2   x3
0  0.5  0.5  0.5

Function 4 top 3 candidates:
         x1        x2        x3        x4
0  0.245070  0.735037  0.100702  0.719850
1  0.307608  0.771506  0.610303  0.066707
2  0.969684  0.358371  0.237818  0.902673

Best candidate:
        x1        x2        x3       x4
0  0.24507  0.735037  0.100702  0.71985

Function 5 top 3 candidates:
         x1        x2        x3        x4
0  0.500000  0.500000  0.500000  0.500000
1  0.074302  0.890333  0