## Setup

In [1]:
import os

from numpy import ndarray
from sklearn.gaussian_process.kernels import Matern
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats._qmc import Sobol
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel
from scipy.stats import norm
from scipy.optimize import minimize
import warnings
warnings.filterwarnings('ignore')

# A nod to Hitchhikers guide to the Galaxy
random_seed=42
np.random.seed(random_seed)

#UCB -  Kappa
# 0.1-1 Exploitation
# 2-5 Balance
# 5-10 Exploratory
KAPPA_BALANCED = 5
KAPPA_EXPLOIT = 0.1
KAPPA_EXPLORE = 10
KAPPA_EXTREME_EXPLORE = 20

KAPPAS = [KAPPA_EXPLOIT, KAPPA_BALANCED, KAPPA_EXPLORE,KAPPA_EXTREME_EXPLORE]


#EI
#0 - Exploitation
#0.01-0.1 - Small Explore
#0.1-0.5 - Moderate Explore
# 0.5+ - Higher explore
XI_EXPLOIT=0
XI_SMALL = 0.1
XI_MODERATE = 0.5
XI_HIGH=20


XIS = [XI_EXPLOIT,XI_SMALL, XI_MODERATE, XI_HIGH]

ACQUISTION_EI = 'ei'
ACQUISTION_UCB = 'ucb'



In [2]:
def upper_confidence_bound(mu, sigma, kappa):
    """
    Upper Confidence Bound (UCB) acquisition function.
    
    UCB = mean + kappa * std
    
    Parameters:
    -----------
    mu : predicted mean
    sigma : predicted standard deviation
    kappa : exploration parameter (higher = more exploration)
    """
    return mu + kappa * sigma

#
def expected_improvement(mu, sigma, y_best, xi):
    """
    Expected Improvement (EI) acquisition function.
    
    EI = E[max(f(x) - f(x_best), 0)]
    
    Parameters:
    -----------
    mu : predicted mean
    sigma : predicted standard deviation  
    y_best : best observed value so far
    xi : exploration parameter
    """
    with np.errstate(divide='warn'):
        improvement = mu - y_best - xi
        Z = improvement / sigma
        ei = improvement * norm.cdf(Z) + sigma * norm.pdf(Z)
        ei[sigma == 0.0] = 0.0
    return ei


In [3]:

def fmt(v):
        return float(f"{v:.8f}")


def summary_stats(x):
    return {
        "min": fmt(np.min(x)),
        "max": fmt(np.max(x)),
        "mean": fmt(np.mean(x)),
        "std": fmt(np.std(x)),
        "q1": fmt(np.percentile(x, 25)),
        "median": fmt(np.percentile(x, 50)),
        "q3": fmt(np.percentile(x, 75)),
    }

def bayesian_optimization_next_point(
        n_dims,
        X_samples,
        y_samples,
        acquisition,
        kappa = KAPPA_BALANCED,
        xi = XI_SMALL,
        n_sobol=None,
        n_local=None):

    """
    Bayesian Optimization acquisition optimizer - after research about search spaces.
    Parameters
    ----------
    n_dims : int
        Number of dimensions
    X_samples : array (N, D)
        Existing sample inputs
    y_samples : array (N,)
        Existing sample outputs
    acquisition : str
        'ei' or 'ucb'
    n_sobol : int
        Number of Sobol candidate points
    n_local : int
        Number of local optimizations
    kappa: int
        Exploration parameter (higher = more exploration)
    """

    if n_sobol is None:
        n_sobol = 16384 * n_dims
    if n_local is None:
        n_local = 16 * n_dims

    bounds = [(0.0, 1.0) for _ in range(n_dims)]


    if acquisition == ACQUISTION_EI:
        print(f"Using Expected Improvement Acquisition function with xi = {xi}")
    else:
        print(f"Using UCB Acquisition function with Kappa = {kappa}")


    # -----------------------------
    # 1. Fit Gaussian Process
    #
    #  alpha is the noise level
    #  n_restarts_optimizer is a hyper-parameter
    #  normalize_y improves stability
    # -----------------------------
    kernel = ConstantKernel(1.0) * Matern(nu=2.5)
    gp = GaussianProcessRegressor(
        kernel=kernel,
        alpha=1e-6,
        n_restarts_optimizer=5,
        normalize_y=True
    )
    gp.fit(X_samples, y_samples)

    # -----------------------------
    # 2. Acquisition function
    # -----------------------------
    def acq(X):
        X = np.atleast_2d(X)
        mu, sigma = gp.predict(X, return_std=True)

        if acquisition == ACQUISTION_EI:
            val = expected_improvement(mu, sigma, y_samples.max(), xi)
        else:
            val = upper_confidence_bound(mu, sigma, kappa)

        return val

    # -----------------------------
    # 3. Sobol global search
    #
    # Provides a better search than random of the space, but we then run a vectorized evaluation loop and pick the top results to focus optimizing
    # i.e. maximising in local optimization (actually we minimise the -value)
    # -----------------------------
    sobol = Sobol(d=n_dims, scramble=True, seed=random_seed)

    X_candidates = sobol.random(n_sobol)
    acq_vals = acq(X_candidates)

    # Pick best Sobol points
    best_idx = np.argsort(-acq_vals)[:n_local]
    X_start = X_candidates[best_idx]

    print(f"\t{len(X_candidates)} Sobol candidates, best {len(X_start)} to be minimised in local search")
    print(f"\tInitial acquisition function stats {summary_stats(acq_vals)}")

    # -----------------------------
    # 4. Local optimization
    # -----------------------------
    def objective(x):
        return -acq(x)  # minimize negative acquisition
    best_x = None
    best_val = -np.inf

    for i, x0 in enumerate(X_start):
        result = minimize(
            objective,
            x0,
            bounds=bounds,
            method="L-BFGS-B"
        )

        val = -result.fun
        if val > best_val:
            best_val = val
            best_x = result.x
            print(f"\tLocal search {i+1}/{n_local}, x={result.x}, val={val}, best={best_val:.10f}")

    return best_x, best_val



In [4]:
#Store Results from runs
runResults = {}



In [5]:
def lexiSort(arr):
    return arr[np.argsort(arr[:, 2])[::-1]]

def formatOutputForSubmission( nextInputToTest: ndarray):
    return np.array2string(nextInputToTest, separator='-', precision=6, floatmode='fixed').strip('[]')



def findNextGuess(functionName, f_Inputs, f_Outputs, showKnown=False):

    f_maxInput = np.max(f_Inputs, axis=0) #Output per dimension
    f_minInput = np.min(f_Inputs, axis=0)
    f_maxOutput = np.max(f_Outputs)
    f_minOutput = np.min(f_Outputs)
    numDimensions = len(f_maxInput)

    f_inputOutput = np.column_stack((f_Inputs, f_Outputs))
    f_inputOutputSorted = lexiSort(f_inputOutput)

    print('********************')
    print(functionName, ' - ', numDimensions, 'D')
    print('********************')
    print(' Inputs -   Min:',  f_minInput, ', Max: ', f_maxInput )
    print(' Output -   Min:',  f_minOutput, ', Max (Best): ', f_maxOutput)


    if (showKnown):
        print()
        print(np.array2string(f_inputOutputSorted, floatmode='fixed', precision=6))
        print()

    print(f"Starting {numDimensions}D Bayes Optimisation from {len(f_Inputs)} known samples")
    print()

    functionProposedPoints = {}
    count = -1

    # EI
    for xi in XIS:
        nextInputToTest, acqFunctionValue = bayesian_optimization_next_point(
            n_dims=numDimensions,
            X_samples=f_Inputs,
            y_samples=f_Outputs,
            acquisition=ACQUISTION_EI,
            xi=xi
        )

        count += 1
        functionProposedPoints[count] = formatOutputForSubmission(nextInputToTest)
        print()
        print(f'\tProposed next point to try: {functionProposedPoints[count]}')
        print()

    for k in KAPPAS:
        nextInputToTest, acqFunctionValue = bayesian_optimization_next_point(
            n_dims=numDimensions,
            X_samples=f_Inputs,
            y_samples=f_Outputs,
            acquisition=ACQUISTION_UCB,
            kappa=k
        )
        count += 1
        functionProposedPoints[count] = formatOutputForSubmission(nextInputToTest)
        print()
        print(f'\tProposed next point to try: {functionProposedPoints[count]}')
        print()

    runResults[functionName] = functionProposedPoints


In [6]:
def collatePreviousSubmissions(functionNo):
    functionIndex = functionNo -1
    assert(functionIndex >= 0)
    assert(functionIndex <= 7)

    #Search for subfolders and find last submission week and results
    week_folders = [d for d in os.listdir("submission_results") if d.startswith("week") ]
    week_folders.sort(key=lambda w: int(w.replace("week", "")))
    last_week = week_folders[-1]
    week_dir = os.path.join("submission_results", last_week)


    def load_file(file_name):
        file_path = os.path.join(week_dir, file_name)
        all_lines = []
        with open(file_path, "r") as f:
            for line in f:
                line = line.strip()
                if not line:
                    continue
                # Parse the trusted Python literals
                parsedData = eval(line, {"array": np.array, "np": np})
                all_lines.append(parsedData)
        return all_lines
    allSubmissions = load_file("inputs.txt")
    allResults = load_file("outputs.txt")

    #Extract the functionNo inputs and outputs only and return those arrays only
    functionSubmissions = np.array([line[functionIndex] for line in allSubmissions])
    functionResults = np.array([line[functionIndex] for line in allResults])

    return functionSubmissions, functionResults


In [7]:
def combineInitialDataAndSubmissionsToDate(functionNo):
    assert(functionNo >= 1)
    assert(functionNo <= 8)

    initialInputs = np.load(f'initial_data/function_{functionNo}/initial_inputs.npy')
    initialOutputs = np.load(f'initial_data/function_{functionNo}/initial_outputs.npy')

    submissions, results = collatePreviousSubmissions(functionNo)

    combined_Inputs = np.vstack((initialInputs, submissions))
    combined_Outputs = np.concatenate((initialOutputs, results))

    assert combined_Inputs.shape[1] == initialInputs.shape[1]
    assert combined_Outputs.shape[0] == combined_Inputs.shape[0]

    return combined_Inputs, combined_Outputs


In [8]:
f1_Inputs, f1_Outputs = combineInitialDataAndSubmissionsToDate(1)
findNextGuess('F1', f1_Inputs, f1_Outputs)

********************
F1  -  2 D
********************
 Inputs -   Min: [0.029245 0.037348] , Max:  [0.88388983 0.8798981 ]
 Output -   Min: -0.0036060626443634764 , Max (Best):  7.710875114502849e-16
Starting 2D Bayes Optimisation from 12 known samples

Using Expected Improvement Acquisition function with xi = 0
	32768 Sobol candidates, best 32 to be minimised in local search
	Initial acquisition function stats {'min': 0.0002653, 'max': 0.0002653, 'mean': 0.0002653, 'std': 0.0, 'q1': 0.0002653, 'median': 0.0002653, 'q3': 0.0002653}
	Local search 1/32, x=[0.43102947 0.81436797], val=0.00026529578026409365, best=0.0002652958

	Proposed next point to try: 0.431029-0.814368

Using Expected Improvement Acquisition function with xi = 0.1
	32768 Sobol candidates, best 32 to be minimised in local search
	Initial acquisition function stats {'min': 0.0, 'max': 0.0, 'mean': 0.0, 'std': 0.0, 'q1': 0.0, 'median': 0.0, 'q3': 0.0}
	Local search 1/32, x=[0.43102947 0.81436797], val=0.0, best=0.00000000

In [9]:
f2_Inputs, f2_Outputs = combineInitialDataAndSubmissionsToDate(2)
findNextGuess('F2', f2_Inputs, f2_Outputs)

********************
F2  -  2 D
********************
 Inputs -   Min: [0.14269907 0.02869772] , Max:  [0.87779099 0.970764  ]
 Output -   Min: -0.06562362443733738 , Max (Best):  0.6112052157614438
Starting 2D Bayes Optimisation from 12 known samples

Using Expected Improvement Acquisition function with xi = 0
	32768 Sobol candidates, best 32 to be minimised in local search
	Initial acquisition function stats {'min': 0.0, 'max': 0.02342799, 'mean': 0.00238995, 'std': 0.00163815, 'q1': 0.00221568, 'median': 0.00236789, 'q3': 0.00238177}
	Local search 1/32, x=[0.68503415 0.91711803], val=0.023447337739884097, best=0.0234473377
	Local search 11/32, x=[0.68503407 0.91711816], val=0.023447337739886918, best=0.0234473377
	Local search 16/32, x=[0.68503411 0.91711812], val=0.02344733773990687, best=0.0234473377

	Proposed next point to try: 0.685034-0.917118

Using Expected Improvement Acquisition function with xi = 0.1
	32768 Sobol candidates, best 32 to be minimised in local search
	Initial

In [10]:
f3_Inputs, f3_Outputs = combineInitialDataAndSubmissionsToDate(3)
findNextGuess('F3', f3_Inputs, f3_Outputs)

********************
F3  -  3 D
********************
 Inputs -   Min: [0.04680895 0.028783   0.06608864] , Max:  [0.96599485 0.94135983 0.99088187]
 Output -   Min: -0.3989255131463011 , Max (Best):  -0.016667499870921285
Starting 3D Bayes Optimisation from 17 known samples

Using Expected Improvement Acquisition function with xi = 0
	49152 Sobol candidates, best 48 to be minimised in local search
	Initial acquisition function stats {'min': 0.00619313, 'max': 0.00619313, 'mean': 0.00619313, 'std': 0.0, 'q1': 0.00619313, 'median': 0.00619313, 'q3': 0.00619313}
	Local search 1/48, x=[0.43102947 0.81436797 0.80641277], val=0.006193125042959033, best=0.0061931250

	Proposed next point to try: 0.431029-0.814368-0.806413

Using Expected Improvement Acquisition function with xi = 0.1
	49152 Sobol candidates, best 48 to be minimised in local search
	Initial acquisition function stats {'min': 0.0, 'max': 0.0009493, 'mean': 0.00022417, 'std': 0.0002383, 'q1': 8.61e-06, 'median': 0.00013356, 'q3'

In [11]:
f4_Inputs, f4_Outputs = combineInitialDataAndSubmissionsToDate(4)
findNextGuess('F4', f4_Inputs, f4_Outputs)

********************
F4  -  4 D
********************
 Inputs -   Min: [0.03782483 0.0062504  0.04218635 0.08151656] , Max:  [0.98562189 0.91959232 0.93917791 0.99948256]
 Output -   Min: -32.625660215962455 , Max (Best):  0.5348195285562913
Starting 4D Bayes Optimisation from 32 known samples

Using Expected Improvement Acquisition function with xi = 0
	65536 Sobol candidates, best 64 to be minimised in local search
	Initial acquisition function stats {'min': 0.0, 'max': 0.07866262, 'mean': 2.854e-05, 'std': 0.00081092, 'q1': 0.0, 'median': 0.0, 'q3': 0.0}
	Local search 1/64, x=[0.40760214 0.41149216 0.3502027  0.43796986], val=0.09500983507370848, best=0.0950098351
	Local search 2/64, x=[0.40760274 0.41149197 0.35020207 0.43796948], val=0.09500983507663785, best=0.0950098351
	Local search 41/64, x=[0.40760289 0.41149192 0.3502018  0.43797002], val=0.09500983508657967, best=0.0950098351

	Proposed next point to try: 0.407603-0.411492-0.350202-0.437970

Using Expected Improvement Acquis

In [12]:
f5_Inputs, f5_Outputs = combineInitialDataAndSubmissionsToDate(5)
findNextGuess('F5', f5_Inputs, f5_Outputs)

********************
F5  -  4 D
********************
 Inputs -   Min: [0.11987923 0.03819337 0.08894684 0.07288048] , Max:  [0.924901   0.86254031 0.979432   0.9576439 ]
 Output -   Min: 0.1129397953712203 , Max (Best):  1959.0960427268283
Starting 4D Bayes Optimisation from 22 known samples

Using Expected Improvement Acquisition function with xi = 0
	65536 Sobol candidates, best 64 to be minimised in local search
	Initial acquisition function stats {'min': 0.0, 'max': 22.06828864, 'mean': 0.00654793, 'std': 0.21381345, 'q1': 5.884e-05, 'median': 0.00031684, 'q3': 0.00066874}
	Local search 1/64, x=[0.32362818 0.83906804 1.         0.93323766], val=36.24416960176854, best=36.2441696018
	Local search 12/64, x=[0.32362817 0.83906803 1.         0.93323767], val=36.244169601768974, best=36.2441696018
	Local search 22/64, x=[0.32362817 0.83906803 1.         0.93323767], val=36.2441696017692, best=36.2441696018
	Local search 44/64, x=[0.32362817 0.83906804 1.         0.93323767], val=36.2441

In [13]:
f6_Inputs, f6_Outputs = combineInitialDataAndSubmissionsToDate(6)
findNextGuess('F6', f6_Inputs, f6_Outputs)


********************
F6  -  5 D
********************
 Inputs -   Min: [0.02173531 0.11440374 0.0165229  0.04561319 0.003667  ] , Max:  [0.95773967 0.93187122 0.989507   0.96165559 0.89281919]
 Output -   Min: -2.5711696316081234 , Max (Best):  -0.2776577094327388
Starting 5D Bayes Optimisation from 22 known samples

Using Expected Improvement Acquisition function with xi = 0
	81920 Sobol candidates, best 80 to be minimised in local search
	Initial acquisition function stats {'min': 0.0, 'max': 0.09511297, 'mean': 0.00108659, 'std': 0.00574477, 'q1': 0.0, 'median': 1e-08, 'q3': 9.02e-06}
	Local search 1/80, x=[0.1939142  0.25207574 0.3907333  0.98175681 0.        ], val=0.10422708917086575, best=0.1042270892
	Local search 8/80, x=[0.19391596 0.25207747 0.39073282 0.98175488 0.        ], val=0.1042270891740603, best=0.1042270892
	Local search 10/80, x=[0.19391491 0.25207912 0.39073049 0.98175654 0.        ], val=0.10422708917811488, best=0.1042270892
	Local search 12/80, x=[0.19391467 0.

In [14]:
f7_Inputs, f7_Outputs = combineInitialDataAndSubmissionsToDate(7)
findNextGuess('F7', f7_Inputs, f7_Outputs)

********************
F7  -  6 D
********************
 Inputs -   Min: [0.011684   0.01181284 0.00363456 0.07365919 0.01494418 0.05109986] , Max:  [0.94245084 0.9246939  0.92457051 0.96101714 0.9986547  0.95101392]
 Output -   Min: 0.0027014650245082332 , Max (Best):  1.627092775388822
Starting 6D Bayes Optimisation from 32 known samples

Using Expected Improvement Acquisition function with xi = 0
	98304 Sobol candidates, best 96 to be minimised in local search
	Initial acquisition function stats {'min': 0.0, 'max': 0.04037694, 'mean': 1.015e-05, 'std': 0.0002196, 'q1': 4e-08, 'median': 3.2e-07, 'q3': 1.08e-06}
	Local search 1/96, x=[0.00768599 0.40740364 0.30417016 0.13670675 0.32957529 0.73775517], val=0.0538679814358783, best=0.0538679814
	Local search 3/96, x=[0.0076867  0.40740356 0.3041702  0.13670677 0.32957782 0.73775392], val=0.053867981459006176, best=0.0538679815
	Local search 4/96, x=[0.00768702 0.40740287 0.30416937 0.13670626 0.32957687 0.73775489], val=0.05386798145958714

In [15]:
f8_Inputs, f8_Outputs = combineInitialDataAndSubmissionsToDate(8)
findNextGuess('F8', f8_Inputs, f8_Outputs)

********************
F8  -  8 D
********************
 Inputs -   Min: [0.00907698 0.0034195  0.02292868 0.         0.00964888 0.02211341
 0.03590888 0.04195607] , Max:  [0.98594539 0.97397979 0.9988855  0.90298577 0.986902   0.99024381
 0.99291449 0.9887551 ]
 Output -   Min: 5.5921933895401965 , Max (Best):  9.838102201175
Starting 8D Bayes Optimisation from 42 known samples

Using Expected Improvement Acquisition function with xi = 0
	131072 Sobol candidates, best 128 to be minimised in local search
	Initial acquisition function stats {'min': 0.0, 'max': 0.18968206, 'mean': 0.00118948, 'std': 0.00779029, 'q1': 0.0, 'median': 0.0, 'q3': 1e-08}
	Local search 1/128, x=[0.02435693 0.29448964 0.02852134 0.25498941 0.62167214 0.56570594
 0.12959998 0.52210154], val=0.22975902676624735, best=0.2297590268
	Local search 7/128, x=[0.02435792 0.29448922 0.02852229 0.254998   0.62166198 0.56570987
 0.12960255 0.52209477], val=0.2297590267750508, best=0.2297590268
	Local search 10/128, x=[0.02435

In [16]:
formatted_data = {}

print(f'0 - Xi {XI_EXPLOIT} exploit')
print(f'1 - Xi {XI_SMALL} small explore')
print(f'2 - Xi {XI_MODERATE }medium explore')
print(f'3 - Xi {XI_HIGH} higher Explore')
print(f'4 - UCB k {KAPPA_EXPLOIT} exploit')
print(f'5 - UCB k {KAPPA_BALANCED} balanced')
print(f'6 - UCB k {KAPPA_EXPLORE} medium explore')
print(f'7 - UCB k {KAPPA_EXTREME_EXPLORE} extreme Explore')

for fname, values in runResults.items():
    formatted_data[fname] = {}
    for k, v in values.items():
        # Keep the hyphen-separated string as-is
        formatted_data[fname][k] = v

# Optional: print in a readable format
for fname, values in formatted_data.items():
    print(f"\n{fname}:")
    for k, vec_str in values.items():
        print(f"{k}: {vec_str}")

0 - Xi 0 exploit
1 - Xi 0.1 small explore
2 - Xi 0.5medium explore
3 - Xi 20 higher Explore
4 - UCB k 0.1 exploit
5 - UCB k 5 balanced
6 - UCB k 10 medium explore
7 - UCB k 20 extreme Explore

F1:
0: 0.431029-0.814368
1: 0.431029-0.814368
2: 0.431029-0.814368
3: 0.431029-0.814368
4: 0.410574-0.147438
5: 0.774176-0.811360
6: 0.793740-0.817773
7: 0.885607-0.579488

F2:
0: 0.685034-0.917118
1: 0.674744-0.914386
2: 0.431029-0.814368
3: 0.431029-0.814368
4: 0.431029-0.814368
5: 0.644053-0.932286
6: 0.685793-0.919272
7: 0.431029-0.814368

F3:
0: 0.431029-0.814368-0.806413
1: 0.754344-1.000000-0.000000
2: 0.994571-0.993175-0.003964
3: 0.431029-0.814368-0.806413
4: 0.431029-0.814368-0.806413
5: 0.431029-0.814368-0.806413
6: 1.000000-1.000000-0.000000
7: 1.000000-0.000000-0.756420

F4:
0: 0.407603-0.411492-0.350202-0.437970
1: 0.415792-0.412454-0.321259-0.438842
2: 0.428170-0.416473-0.274938-0.438868
3: 0.006332-0.146355-0.995860-0.934832
4: 0.408086-0.411314-0.368519-0.433981
5: 0.000000-0.000