In [None]:
%load_ext autoreload
%autoreload 2
%config Completer.use_jedi = False

In [None]:
from pathlib import Path

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
class NullClass:
    def do_nothing(*args, **kwargs):
        pass
    def add_colorbar(self, im, **kwargs):
        return plt.colorbar(im)
    def __getattr__(self, _):
        return self.do_nothing
try:
    from mpl_utils import MPLAdjutant
    adj = MPLAdjutant()
    adj.set_defaults()
except ImportError:
    adj = NullClass()

In [None]:
from easygp import DEBUG, DISABLE_DEBUG, logger, disable_logger
from easygp.gp import Campaign, AutoscalingGaussianProcessRegressor, MultiCampaign
from easygp.policy import MaxVariancePolicy, ExpectedImprovementPolicy, ExploitationTargetPolicy

In [None]:
DEBUG()

# 1D input testing

In [None]:
def get_input_data(seed=127, N=1000, Nsmall=10):

    np.random.seed(seed)
    idx = np.random.choice([xx for xx in range(N)], Nsmall, replace=False)
    idx.sort()
    grid = np.linspace(-20, 50, N)

    X = grid[idx]  # Feature data
    
    X[0] = -20.0
    X[-1] = 50.0

    alpha1 = (np.linspace(-2, 2, N)[idx])**2 + 0.1  # Noise/uncertainty
    alpha2 = (np.linspace(-2, 2, N)[idx])**2 + 0.2  # Noise/uncertainty
    alpha = np.array([alpha1, alpha2]).T

    def truth(x):
        # Linear upwards trend
        return x + np.sin(x) * 2.345

    def truth2(x):
        return np.abs(x) - np.sin(x) * 2.345

    y = np.array([truth(X), truth2(X)]).T  # Target data
    X = X[:, None]
    
    return grid, X, y, alpha

## Standalone GP

In [None]:
grid, X, y, alpha = get_input_data()

In [None]:
gp = AutoscalingGaussianProcessRegressor(bounds=[(-20, 50)], n_targets=2)
gp.fit(X, y, alpha)

In [None]:
mu, std = gp.predict(grid)

In [None]:
mu_sampled = gp.sample_y_reproducibly(grid, n_samples=1, randomstate=124)
mu_sampled2 = gp.sample_y_reproducibly(grid[::40], n_samples=1, randomstate=124)

In [None]:
plt.clf()

fig, axs = plt.subplots(1, 2, figsize=(6, 2), sharex=True)

for ii in range(y.shape[1]):
    ax = axs[ii]
    adj.set_grids(ax, grid=False)
    ax.plot(grid, mu[:, ii], 'k-')
    ax.plot(grid, mu_sampled[:, ii], 'g-', linewidth=0.5)
#     ax.plot(grid[::40], mu_sampled2[:, ii], 'c-', linewidth=0.5)
    ax.fill_between(grid, (mu[:, ii] - std[:, ii]).squeeze(), (mu[:, ii] + std[:, ii]).squeeze(), color="red", alpha=0.5)
    ax.scatter(X.squeeze(), y[:, ii], color='blue', s=4)

plt.show()

## Policy/campaign testing

In [None]:
DISABLE_DEBUG()

In [None]:
grid, X, y, alpha = get_input_data()

In [None]:
policy = MaxVariancePolicy()
# policy = HybridExploitExplorePolicy(percent_explore=90)
# policy.set_target(np.array([-25.0, 50.0]))
policy.set_weight(np.array([0.1, 1.0]))

In [None]:
campaign = Campaign(X, y, alpha, bounds=[(-20, 50)], policy=policy)

In [None]:
performance = campaign.run(10)

### Multi-campaign testing

In [None]:
campaigns = [Campaign(X, y, alpha, bounds=[(-20, 50)], policy=policy, randomstate=ii) for ii in range(12)]

In [None]:
multicampaign = MultiCampaign(campaigns)

In [None]:
results = multicampaign.run(12, n_jobs=6)

In [None]:
results[9][1]._iteration

In [None]:
campaigns[3]._iteration

# 2D test

https://www.geeksforgeeks.org/matplotlib-pyplot-imshow-in-python/

In [None]:
def grids_to_coordinates(grids):
    x = np.meshgrid(*grids)
    return np.array([xx.flatten() for xx in x]).T

In [None]:
np.random.seed(126)
N = 100
M = 150
idx = np.random.choice([xx for xx in range(N*M)], 20, replace=False)
idx.sort()

grid_x = np.linspace(-4, 5, N)
grid_y = np.linspace(-5, 4, M)

# Feature data
g1, g2 = np.meshgrid(grid_x, grid_y)
X = np.array([g1.flatten(), g2.flatten()]).T
X = X[idx, :]

X_original = X.copy()

# alpha = (np.linspace(-2, 2, N**2)[idx])**2 * 0  # Noise/uncertainty
alpha = np.array([1e-5 for _ in range(len(X))])

def func(x, y):
    return (1 - x / 3. + x ** 5 + y ** 5) * np.exp(-x ** 2 - y ** 2) + np.exp(-(x - 2)**2 - (y + 4)**2)

def truth(X):
    x = X[:, 0]
    y = X[:, 1]
    return func(x, y)

def truth_meshgrid(x, y):
    x = x.reshape(-1, 1)
    y = y.reshape(1, -1)
    return func(x, y)
    

y = truth(X)  # Target data
# X = X[:, None]


In [None]:
grid = grids_to_coordinates([grid_x, grid_y])

In [None]:
campaign = Campaign(X, y[:, None], alpha, bounds=[(-4, 5), (-5, 4)], policy=MaxVariancePolicy(), randomstate=124)
mu, std = campaign.gp.predict(grid)

In [None]:
performance = campaign.run(10)
# mu2, std2 = campaign.gp.predict(grid)
# truth = campaign.truth(grid)
# mu = mu.reshape(M, N)
# std = std.reshape(M, N)
# mu2 = mu2.reshape(M, N)
# std2 = std2.reshape(M, N)

In [None]:
truth

In [None]:
z = truth_meshgrid(grid_x, grid_y)
z_min = -np.abs(z).max()
z_max = np.abs(z).max()



fig, axs = plt.subplots(1, 4, figsize=(6, 6), sharey=True, sharex=True)

ax = axs[0]
c = ax.imshow(
    z.T, cmap='rainbow', vmin=z_min, vmax=z_max,
    extent=[grid_x.min(), grid_x.max(), grid_y.min(), grid_y.max()],
    interpolation ='nearest', origin ='lower'
)
adj.set_grids(ax, grid=False)
ax.set_title("Function")

ax = axs[1]
c = ax.imshow(
    truth.reshape(M, N), cmap='rainbow', vmin=z_min, vmax=z_max,
    extent=[grid_x.min(), grid_x.max(), grid_y.min(), grid_y.max()],
    interpolation ='nearest', origin ='lower'
)
adj.set_grids(ax, grid=False)
ax.set_title("'Truth'")

# ax = axs[2]
# c = ax.imshow(
#     mu2, cmap='rainbow', vmin=z_min, vmax=z_max,
#     extent=[grid_x.min(), grid_x.max(), grid_y.min(), grid_y.max()],
#     interpolation ='nearest', origin ='lower'
# )
# adj.set_grids(ax, grid=False)
# ax.scatter(campaign.X[:, 0], campaign.X[:, 1], s=0.3, color="black")
# ax.scatter(X_original[:, 0], X_original[:, 1], s=0.3, color="blue")
# ax.set_title("GP")

# ax = axs[3]
# c = ax.imshow(
#     std2, cmap='viridis', vmin=z_min, vmax=z_max,
#     extent=[grid_x.min(), grid_x.max(), grid_y.min(), grid_y.max()],
#     interpolation ='nearest', origin ='lower'
# )
# ax.scatter(campaign.X[:, 0], campaign.X[:, 1], s=0.1, color="black")
# ax.set_title("Var")

adj.set_grids(ax, grid=False)
plt.show()

# Real data

In [None]:
root = Path.home() / Path("Data/22_nanoparticle/2022_04_14")
feature_data = pd.read_excel(root / Path("Sample_Info.xlsx"), sheet_name=[0, 1, 2], index_col=0)
feature_data = pd.concat([xx for xx in feature_data.values()], axis=0)
feature_data = feature_data[feature_data["SAXS"] == "Y"]

In [None]:
# Get only the features
indexes = [int(xx) for xx in feature_data.index.to_list()]
feature_data = feature_data[["V_CA", "V_CN", "V_HA"]].to_numpy()

In [None]:
root = Path.home() / Path("Data/22_nanoparticle/2022_04_14/2021_12_21B_astest_preds")
data = {int(xx.name.split("_")[-2]): np.loadtxt(xx) for xx in root.glob("*_pred.dat")}

In [None]:
target_data_R = np.array([data[ii][0, 0] for ii in indexes])
target_data_R_alpha = np.array([data[ii][0, 1] for ii in indexes])
target_data_log10s = np.array([data[ii][1, 0] for ii in indexes])
target_data_log10s_alpha = np.array([data[ii][1, 1] for ii in indexes])

In [None]:
campaign = Campaign(
    feature_data, target_data_R, target_data_R_alpha,
    bounds=[(0, 60), (0, 240), (20, 120)], policy=ExpectedImprovementPolicy(), target=70.0, randomstate=124
)

In [None]:
performance = campaign.run(n=10)

In [None]:
r = campaign.truth(campaign.X)

In [None]:
r