In [15]:
import numpy as np

# ============================================================
# Ridge regression to propose Week 5 queries (F2, F5, F7)
# ============================================================
# CONTEXT (BBO challenge)
# ----------------------
# You have a black-box function f(x) for each objective (F2/F5/F7).
# Each week you submit ONE x (a vector in [0,1]^d), and you get ONE output y=f(x).
# By Week 4 you have 4 samples per function: (x^(1), y^(1)), ..., (x^(4), y^(4)).
#
# Goal for Week 5:
#   Choose a new x^(5) that (hopefully) increases y.
#
# Idea used in this notebook:
#   1) Fit a *simple surrogate model* y_hat(x) ≈ b0 + b^T x using Ridge regression
#   2) Use its coefficients b as an estimate of the *local uphill direction*
#   3) Take a small step from the best-so-far point in that direction
#
# Why Ridge?
#   With only 4 data points, a plain linear regression can overfit and give unstable coefficients.
#   Ridge adds L2 regularisation to stabilise the coefficient estimates.
#
# Ridge objective (fit a linear model):
#   minimize_{b0,b}  Σ (y_i - (b0 + b^T x_i))^2  +  α ||b||^2
# where:
#   - b0 is intercept (not regularised)
#   - b are coefficients (regularised)
#   - α controls regularisation strength:
#       α small  -> more flexible (risk overfit)
#       α large  -> more stable but may underfit
# ============================================================


# Try scikit-learn Ridge; fallback to a closed-form ridge if sklearn isn't available.
# In Jupyter, you likely have sklearn installed, but this makes the notebook portable.
try:
    from sklearn.linear_model import Ridge
    SKLEARN_AVAILABLE = True
except ImportError:
    SKLEARN_AVAILABLE = False


# ----------------------------
# Your Weeks 1–4 data (inputs X, outputs y)
# ----------------------------
# Each function stores:
#   X: shape (n_samples=4, d)
#   y: shape (n_samples=4,)
#   step: the step size used to move from best x in the ridge "uphill" direction.
#
# Step sizes:
#   - F2 is sensitive -> smaller step
#   - F5 is smooth and strongly improving -> moderate step
#   - F7 is high-dimensional -> moderate but cautious step
DATA = {
    "F2": {
        "X": np.array([
            [0.755000, 0.275000],  # Week 1
            [0.785000, 0.305000],  # Week 2
            [0.740000, 0.260000],  # Week 3
            [0.730000, 0.270000],  # Week 4
        ], dtype=float),
        "y": np.array([
            0.42044085041824825,
            -0.0456643112924181,
            0.46274019045813003,
            0.6060955609811236,
        ], dtype=float),
        "step": 0.014,
    },

    "F5": {
        "X": np.array([
            [0.635000, 0.395000, 0.755000, 0.875000],  # Week 1
            [0.665000, 0.365000, 0.785000, 0.845000],  # Week 2
            [0.680000, 0.350000, 0.800000, 0.830000],  # Week 3
            [0.695000, 0.335000, 0.815000, 0.815000],  # Week 4
        ], dtype=float),
        "y": np.array([
            287.4343816627659,
            292.2593658119571,
            301.5311905557768,
            315.65049985154724,
        ], dtype=float),
        "step": 0.024,
    },

    "F7": {
        "X": np.array([
            [0.875000, 0.275000, 0.635000, 0.515000, 0.145000, 0.955000],  # Week 1
            [0.845000, 0.305000, 0.665000, 0.485000, 0.175000, 0.925000],  # Week 2
            [0.830000, 0.320000, 0.680000, 0.470000, 0.190000, 0.910000],  # Week 3
            [0.815000, 0.335000, 0.695000, 0.455000, 0.205000, 0.895000],  # Week 4
        ], dtype=float),
        "y": np.array([
            0.6267064847700778,
            0.8069621926499697,
            0.8919314248129555,
            0.969339703275594,
        ], dtype=float),
        "step": 0.024,
    },
}


# ----------------------------
# Helper functions
# ----------------------------
def format_query(x: np.ndarray) -> str:
    """
    Portal format: '0.xxxxxx-0.xxxxxx-...'
    """
    return "-".join(f"{v:.6f}" for v in x)


def clip_01(x: np.ndarray) -> np.ndarray:
    """
    Keep values within [0, 0.999999] so they always start with '0.' and remain valid.
    (The portal expects values starting with 0 and 6 decimals.)
    """
    return np.clip(x, 0.0, 0.999999)


def fit_ridge_sklearn(X: np.ndarray, y: np.ndarray, alpha: float):
    """
    Fit Ridge regression using scikit-learn.

    The model is:
        y_hat(x) = b0 + b^T x

    scikit-learn solves:
        min Σ (y_i - y_hat(x_i))^2 + alpha * ||b||^2

    Returns:
        b0: intercept
        b : coefficient vector
    """
    model = Ridge(alpha=alpha, fit_intercept=True)
    model.fit(X, y)
    return float(model.intercept_), model.coef_.astype(float)


def fit_ridge_closed_form(X: np.ndarray, y: np.ndarray, alpha: float):
    """
    Closed-form Ridge regression (no sklearn required).

    We rewrite the linear model with an intercept using an augmented matrix:
        Z = [1, X]   (prepend a column of ones)

    Then solve:
        beta = (Z^T Z + alpha * I)^(-1) Z^T y

    Important detail:
        We do NOT regularise the intercept.
        So we set I[0,0] = 0 (no penalty on intercept term).

    Returns:
        b0: intercept
        b : coefficient vector
    """
    n, d = X.shape
    Z = np.hstack([np.ones((n, 1)), X])  # add intercept column
    I = np.eye(d + 1)
    I[0, 0] = 0.0  # do NOT regularise intercept

    beta = np.linalg.solve(Z.T @ Z + alpha * I, Z.T @ y)
    b0 = float(beta[0])
    b = beta[1:].astype(float)
    return b0, b


def propose_week5(X: np.ndarray, y: np.ndarray, alpha: float, step: float):
    """
    Core algorithm to propose Week 5 query:

    Step 1) Fit Ridge regression surrogate:
        y_hat(x) = b0 + b^T x

    Step 2) Pick the best observed point so far:
        x_best = argmax_y observed (x_i)

    Step 3) Move a small step in the direction that increases y_hat:
        For a linear model, gradient wrt x is:
            ∇_x y_hat = b
        So an "uphill" move is along +b.

        We normalise b so 'step' has consistent meaning:
            direction = b / ||b||
            x_new = x_best + step * direction

    Why normalise?
        b's magnitude depends on scaling and regularisation.
        Normalising makes step size comparable across functions.

    Safety:
        If ||b|| is ~0, the model is essentially flat -> no reliable direction.
        Then we keep x_new = x_best.

    Returns:
        x_best, y_best, b0, b, x_new
    """
    if SKLEARN_AVAILABLE:
        b0, b = fit_ridge_sklearn(X, y, alpha=alpha)
    else:
        b0, b = fit_ridge_closed_form(X, y, alpha=alpha)

    best_idx = int(np.argmax(y))
    x_best = X[best_idx].copy()
    y_best = float(y[best_idx])

    norm = np.linalg.norm(b)
    if norm < 1e-12:
        # Model says: changing x doesn't affect y (or data too small -> unstable b)
        x_new = x_best
        direction = np.zeros_like(b)
    else:
        direction = b / norm
        x_new = x_best + step * direction

    x_new = clip_01(x_new)
    return x_best, y_best, b0, b, direction, x_new


# ----------------------------
# Run ridge + propose Week 5
# ----------------------------
def main():
    # Regularisation strength:
    # - Start with 1e-2 as a reasonable default for tiny datasets
    # - You can try 1e-4, 1e-3, 1e-2, 1e-1 to see stability/sensitivity
    alpha = 1e-2

    print(f"Using sklearn: {SKLEARN_AVAILABLE}")
    print(f"Ridge alpha = {alpha}\n")

    for fname, d in DATA.items():
        X, y, step = d["X"], d["y"], d["step"]

        x_best, y_best, b0, b, direction, x_new = propose_week5(X, y, alpha=alpha, step=step)

        print(f"{fname}")
        print(f"  Best-so-far x : {format_query(x_best)}   y: {y_best:.6f}")
        print(f"  Ridge intercept b0: {b0:.6f}")
        print(f"  Ridge coeffs b (≈ local gradient direction): {np.array2string(b, precision=6, suppress_small=False)}")
        print(f"  Unit direction b/||b||: {np.array2string(direction, precision=6, suppress_small=False)}")
        print(f"  Step size: {step}")
        print(f"  Week 5 x (suggested): {format_query(x_new)}")
        print()

# In a Jupyter notebook, you can just run main() in a cell.
# If you copy into a .py script, the __name__ guard runs it automatically.
main()


Using sklearn: True
Ridge alpha = 0.01

F2
  Best-so-far x : 0.730000-0.270000   y: 0.606096
  Ridge intercept b0: 1.862963
  Ridge coeffs b (≈ local gradient direction): [-1.566729 -1.164311]
  Unit direction b/||b||: [-0.802632 -0.596474]
  Step size: 0.014
  Week 5 x (suggested): 0.718763-0.261649

F5
  Best-so-far x : 0.695000-0.335000-0.815000-0.815000   y: 315.650500
  Ridge intercept b0: 286.648344
  Ridge coeffs b (≈ local gradient direction): [ 49.296141 -49.296141  49.296141 -49.296141]
  Unit direction b/||b||: [ 0.5 -0.5  0.5 -0.5]
  Step size: 0.024
  Week 5 x (suggested): 0.707000-0.323000-0.827000-0.803000

F7
  Best-so-far x : 0.815000-0.335000-0.695000-0.455000-0.205000-0.895000   y: 0.969340
  Ridge intercept b0: 1.387213
  Ridge coeffs b (≈ local gradient direction): [-0.518141  0.518141  0.518141 -0.518141  0.518141 -0.518141]
  Unit direction b/||b||: [-0.408248  0.408248  0.408248 -0.408248  0.408248 -0.408248]
  Step size: 0.024
  Week 5 x (suggested): 0.805202-0