In [14]:
import numpy as np
import os
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern
from scipy.optimize import minimize
from scipy.stats import norm
import warnings

In [15]:
# Suppress GPR numerical warnings for cleaner output
warnings.filterwarnings("ignore", category=UserWarning)

In [16]:
# --- CONFIGURATION ---
BASE_DIR = 'initial_data'
N_FUNCTIONS = 8
BETA_EXPLORATION = 2.5 # High Beta (>= 2.5) for aggressive exploration in Wk 1


In [17]:
# --- CORE FUNCTIONS ---

def load_function_data(function_id, base_dir):
    """Loads X (inputs) and Y (outputs) for a single function."""
    folder_name = f'function_{function_id}'
    folder_path = os.path.join(base_dir, folder_name)
    
    X = np.load(os.path.join(folder_path, 'initial_inputs.npy'))
    Y = np.load(os.path.join(folder_path, 'initial_outputs.npy'))
    
    # Check dimensionality from the loaded data
    dim = X.shape[1]
    
    # Ensure Y is 1D (sometimes loaded as (N, 1))
    if Y.ndim > 1:
        Y = Y.flatten()
        
    return X, Y, dim

def ucb_acquisition(x, gp, Y_max, beta):
    """
    Upper Confidence Bound (UCB) acquisition function
    
    A high 'beta' encourages exploration (prioritises uncertainty).
    Negate the result because using a minimiser to find the max.
    """
    x = x.reshape(1, -1)
    # Get the mean (mu) and std (sigma) from the GP
    mu, sigma = gp.predict(x, return_std=True)
    
    # Calculate UCB: mu + sqrt(beta) * sigma
    ucb = mu + np.sqrt(beta) * sigma
    
    return -ucb[0] # Return the negative UCB for minimisation

def format_submission_string(x_best):
    """Formats the input vector x to the required 'x1-x2-...-xn' string"""
    # Round each coordinate to six decimal places and join with '-'
    x_formatted = [f"{coord:.6f}" for coord in x_best]
    return "-".join(x_formatted)


In [18]:
# --- MAIN EXECUTION LOOP ---
submission_queries = {} # Dictionary to hold all 8 query strings

for i in range(1, N_FUNCTIONS + 1):
    print(f"\n--- Processing Function {i} ---")
    
    # 1. Load Data
    X, Y, D = load_function_data(i, BASE_DIR)
    Y_max = np.max(Y)
    
    print(f"Dimensions: {D}-D. Current Y_max: {Y_max:.4f}")
    
    # 2. Build and Train GP
    # Matern kernel is used due to its flexibility.
    # length_scale_bounds='fixed' prevents optimisation for speed
    # may remove this for better performance later.
    kernel = Matern(length_scale=np.ones(D), length_scale_bounds=(1e-1, 1e2), nu=2.5)
    gp = GaussianProcessRegressor(
        kernel=kernel, alpha=1e-6, n_restarts_optimizer=20, random_state=42)
    gp.fit(X, Y)
    
    # 3. Define the Optimisation Task
    # Bounds for all functions are assumed to be [0.0, 1.0] for all dimensions.
    bounds = [(0.0, 1.0)] * D
    
    # Set the UCB exploration parameter based on dimensionality (Strategy: High Beta)
    # Increase Beta for higher dimensions where data is sparser.
    current_beta = BETA_EXPLORATION
    if D >= 5:
        current_beta = BETA_EXPLORATION * 1.25 # More aggressive exploration for high-D
        
    # The objective function (to be minimse) is the negative UCB
    ucb_objective = lambda x: ucb_acquisition(x, gp, Y_max, current_beta)
    
    # 4. Find the Argmax (i.e., minimise the negative UCB)
    # Use multiple random starts to avoid local minima in the acquisition function
    best_ucb_value = np.inf
    x_next = None
    
    for _ in range(20*D): # 20*D random restarts for better robustness
        x0 = np.random.uniform(0, 1, D)
        
        # Use a general minimiser (L-BFGS-B is good for bounded problems)
        res = minimize(ucb_objective, x0, bounds=bounds, method='L-BFGS-B')
        
        if res.fun < best_ucb_value:
            best_ucb_value = res.fun
            x_next = res.x
            
    # 5. Format and Store Result
    if x_next is not None:
        submission_string = format_submission_string(x_next)
        submission_queries[f'Function {i}'] = submission_string
        print(f"Query for F{i}: {submission_string}")
        print(f"Strategy: UCB with Beta={current_beta:.2f} (Exporation)")
    else:
        submission_queries[f'Function {i}'] = 'ERROR: Rerun'
        print("ERROR: Optimization failed to find a point.")
        


--- Processing Function 1 ---
Dimensions: 2-D. Current Y_max: 0.0000
Query for F1: 0.000000-0.000000
Strategy: UCB with Beta=2.50 (Exporation)

--- Processing Function 2 ---
Dimensions: 2-D. Current Y_max: 0.6112
Query for F2: 1.000000-0.000000
Strategy: UCB with Beta=2.50 (Exporation)

--- Processing Function 3 ---
Dimensions: 3-D. Current Y_max: -0.0348
Query for F3: 1.000000-1.000000-0.837688
Strategy: UCB with Beta=2.50 (Exporation)

--- Processing Function 4 ---
Dimensions: 4-D. Current Y_max: -4.0255
Query for F4: 0.531039-0.401922-0.000000-0.276120
Strategy: UCB with Beta=2.50 (Exporation)

--- Processing Function 5 ---
Dimensions: 4-D. Current Y_max: 1088.8596
Query for F5: 0.232921-0.841389-0.883358-0.879468
Strategy: UCB with Beta=2.50 (Exporation)

--- Processing Function 6 ---
Dimensions: 5-D. Current Y_max: -0.7143
Query for F6: 0.312024-0.000000-0.393987-1.000000-0.000000
Strategy: UCB with Beta=3.12 (Exporation)

--- Processing Function 7 ---
Dimensions: 6-D. Current Y_

In [19]:
# --- FINAL OUTPUT ---
print("\n==============================")
print("== WEEK 1 SUBMISSION QUERIES ==")
print("===============================")
for f_name, query in submission_queries.items():
    print(f"{f_name}: {query}")


== WEEK 1 SUBMISSION QUERIES ==
Function 1: 0.000000-0.000000
Function 2: 1.000000-0.000000
Function 3: 1.000000-1.000000-0.837688
Function 4: 0.531039-0.401922-0.000000-0.276120
Function 5: 0.232921-0.841389-0.883358-0.879468
Function 6: 0.312024-0.000000-0.393987-1.000000-0.000000
Function 7: 0.000000-0.207680-1.000000-0.000000-0.323235-1.000000
Function 8: 0.140670-0.101576-0.175983-0.000000-1.000000-0.408319-0.145334-0.000000
