In [1]:
import numpy as np
from matplotlib import pyplot as plt
from matplotlib.colors import LogNorm
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, WhiteKernel
from mpl_toolkits.axes_grid1.inset_locator import inset_axes

In [2]:
data = np.load('us_topo.npy')
X, y = data[:,:-1], data[:,-1]

In [3]:
from dask.distributed import Client

In [4]:
client = Client(scheduler_file='scheduler.json')

In [5]:
client

0,1
Client  Scheduler: tcp://10.128.0.203:44723  Dashboard: http://10.128.0.203:8787/status,Cluster  Workers: 48  Cores: 3072  Memory: 6.08 TB


In [6]:
state = np.random.get_state()

In [7]:
np.random.shuffle(X)

In [8]:
np.random.set_state(state)

In [9]:
np.random.shuffle(y)

In [10]:
from fit import fit
GaussianProcessRegressor.fit = fit

In [11]:
client.scatter(X, broadcast=True)

In [12]:
client.scatter(y, broadcast=True)

In [13]:
hgdl_GPs = GaussianProcessRegressor(kernel=1.0*RBF(1.0), optimizer='hgdl', random_state=42).fit(
    X[:1000], y[:1000], client=client, num_individuals=3, num_epochs=5, max_local=2, r=3.0)

after get latest {'best_x': array([7.33712987, 0.47512307]), 'best_y': 235021.96310341082, 'minima_x': array([], shape=(0, 2), dtype=float64), 'minima_y': array([], dtype=float64), 'global_x': array([[ 7.33712987,  0.47512307],
       [ 6.30807114, -1.40737593],
       [ 8.25702225,  4.54456682],
       [-1.81869676,  5.94728829],
       [-9.34441189, 10.95160937]]), 'global_y': array([2.35021963e+05, 9.03348056e+05, 3.42994561e+17, 6.05425434e+17,
       2.02087439e+18])}


In [14]:
hgdl_GPs

[GaussianProcessRegressor(kernel=39.2**2 * RBF(length_scale=1.61),
                          random_state=42),
 GaussianProcessRegressor(kernel=23.4**2 * RBF(length_scale=0.245),
                          random_state=42),
 GaussianProcessRegressor(kernel=62.1**2 * RBF(length_scale=94.1),
                          random_state=42),
 GaussianProcessRegressor(kernel=0.403**2 * RBF(length_scale=383),
                          random_state=42),
 GaussianProcessRegressor(kernel=0.00935**2 * RBF(length_scale=5.7e+04),
                          random_state=42)]

In [15]:
# print_GPs(hgdl_GPs, 'hgdl')

In [16]:
GPs = hgdl_GPs

In [17]:
likelihoods = [x.log_marginal_likelihood_value_ for x in GPs]

In [18]:
likelihoods

[-235021.96310341082,
 -903348.056128845,
 -3.4299456128650643e+17,
 -6.054245656818971e+17,
 -2.0208743856350892e+18]

In [19]:
[z.score(X,y) for z in GPs]

[0.19539185761158795,
 -0.7859099780804824,
 0.8114774236818613,
 0.7917479953199635,
 0.23843486816673043]

In [20]:
with open('GPs.pkl', 'wb') as file:
    from pickle import dump
    dump(GPs, file)