In [64]:
import pandas as pd
import os
import glob
import yaml

In [69]:
def mahan(values, means):
    '''Mahanalobis distance, based on 5% being 3 SDs away'''
    vars = [(1./3600)*(m**2) for m in means]
    sqer = [((values[i] - m)**2)/vars[i] for i, m in enumerate(means)]
    return sum(sqer)

In [40]:
'''
Gaussian Process associated functions
'''
import numpy as np
from skopt.learning import GaussianProcessRegressor
from skopt.learning.gaussian_process.kernels import Matern, ConstantKernel, WhiteKernel

def create_gp(n_dims=3):
    '''Create a GP object'''
    cov_amplitude = ConstantKernel(1.0, (0.01, 1e8))
    matern_kernel = Matern(
        length_scale=np.ones(n_dims),
        length_scale_bounds=[(0.01, 1e8)]*n_dims, nu=2.5)

    kernel = cov_amplitude * matern_kernel + WhiteKernel(noise_level_bounds=(1e-10, 1e2))
    gp = GaussianProcessRegressor(
        kernel,
        n_restarts_optimizer=5,
        optimizer='fmin_l_bfgs_b',
        normalize_y=True
    )
    return gp


In [86]:
class Problem:
    pass

class SurrogateSimulationProblem(Problem):
    def __init__(self, folder, inputs, outputs, output_actuals_names):
        self.folder = folder
        self.results_df = self.load_trial_results()
        self.config = self.load_yaml()
        self.inputs = inputs
        self.outputs = outputs
        self.output_actuals_names = output_actuals_names
        self.gps = [self.create_gp(o) for o in self.outputs]

    def load_trial_results(self):
        results_f = glob.glob(os.path.join(self.folder, '*res-full.csv'))[0]
        return pd.read_csv(results_f)
    
    def load_yaml(self):
        config_f = glob.glob(os.path.join(self.folder, '*.yml'))[0]
        with open(config_f, 'r', encoding='utf-8') as stream:
            return yaml.safe_load(stream)
    
    def create_gp(self, output_name):
        gp = create_gp(n_dims=len(self.inputs))
        inputs_df = self.results_df[self.results_df['row-type'] == 'actual'][self.inputs]
        output_df = self.results_df[self.results_df['row-type'] == 'actual'][output_name]
        gp.fit(inputs_df.values, output_df.values)
        return gp
    
    def evaluate(self, X):

        predictions = [gp.sample_y(X, random_state = None) for gp in self.gps]
        actuals = [self.config['actuals'][oan] for oan in self.output_actuals_names]

        print("predictions: {}, actuals: {}".format(predictions, actuals))

        return mahan(predictions, actuals)
    


In [87]:
folder_path = r"C:\Users\hannag01\OneDrive - NHS Scotland\Whole System Modelling\Calibration\Highland\Highland-2024-07-10"

In [88]:
p = SurrogateSimulationProblem(folder_path, inputs = ['dd-los', 'gw-non-covid-los', 'syswatch-scale'],
                                outputs = ['emergency-output', 'delay-output'],
                                output_actuals_names=['emergency', 'delays'])

In [90]:
test_X_min = np.array([73.92173248,  5,  0.75 ]).reshape(1, -1)

In [91]:
for i in range(10):
    print(p.evaluate(test_X_min))

predictions: [array([[391.55551283]]), array([[101.02608642]])], actuals: [381.0, 104.0]
[[5.706886]]
predictions: [array([[387.81881737]]), array([[102.24619276]])], actuals: [381.0, 104.0]
[[2.17687307]]
predictions: [array([[387.81667654]]), array([[97.99184584]])], actuals: [381.0, 104.0]
[[13.16722489]]
predictions: [array([[392.23887311]]), array([[105.66063773]])], actuals: [381.0, 104.0]
[[4.05042993]]
predictions: [array([[394.3566917]]), array([[102.64048633]])], actuals: [381.0, 104.0]
[[5.03954003]]
predictions: [array([[390.65523557]]), array([[104.02965449]])], actuals: [381.0, 104.0]
[[2.31224196]]
predictions: [array([[383.95961428]]), array([[102.08358647]])], actuals: [381.0, 104.0]
[[1.43963412]]
predictions: [array([[390.72472228]]), array([[99.68012903]])], actuals: [381.0, 104.0]
[[8.55657281]]
predictions: [array([[394.79887662]]), array([[101.51985728]])], actuals: [381.0, 104.0]
[[6.76948875]]
predictions: [array([[385.85426887]]), array([[103.34943071]])], act