In [2]:
import numpy as np
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from scipy.stats.qmc import LatinHypercube

from duqling_py import Duqling
from kernel_search import search_kernel, build_kernel

In [3]:
duqling = Duqling()

In [4]:
def lhs_array(n: int, d: int, ranges: np.ndarray | None = None, seed: int | None = None) -> np.ndarray:
    sampler = LatinHypercube(d, seed=seed)
    samples = sampler.random(n)
    if ranges is not None:
        samples = ranges[:, 0] + samples * (ranges[:, 1] - ranges[:, 0])
    return samples

In [5]:
def get_data(fname):
    func_info = duqling.quack(fname)
    input_dim   = func_info["input_dim"]
    input_range = func_info["input_range"]

    X_tr = lhs_array(1000, input_dim, input_range, seed=31)
    X_va = lhs_array(1000, input_dim, input_range, seed=41)
    X_te = lhs_array(1000, input_dim, input_range, seed=59)

    y_tr = duqling.duq(X_tr, fname, scale01=False)
    y_va = duqling.duq(X_va, fname, scale01=False)
    y_te = duqling.duq(X_te, fname, scale01=False)

    scaler = StandardScaler()
    X_tr = scaler.fit_transform(X_tr)
    X_va = scaler.transform(X_va)
    X_te = scaler.transform(X_te)

    return [(X_tr, y_tr), (X_va, y_va), (X_te, y_te)]

In [6]:
(X_tr, y_tr), (X_va, y_va), (X_te, y_te) = get_data('banana')

kernel = search_kernel(X_tr, y_tr)
gp = GaussianProcessRegressor(kernel=build_kernel(kernel), normalize_y=True)
gp.fit(X_tr, y_tr)
print(f"Test RMSE: {np.sqrt(mean_squared_error(gp.predict(X_te), y_te)):.4f}")

ABNORMAL: .

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Test RMSE: 0.0121


ABNORMAL: .

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)
