# Calibration Test Cases
Here we define the cases we would expect to see of varying drift in the accelerator, for which we want to test for. 

* normal - the values vary according to some predefined schedule
* sensor accuracy decreases over time, slowly getting noisier (does this happen?)
* calibration of sensors gets worse over time, e.g. a magnet requiring more current to get the same influence on the beam
* the same feature values no longer give the same output values (because of the influence of some external parameter not captured in the features?)
* feature values vary according to some periodic function over a time period
* machine 'mode' is different, meaning live distribution of feature values is not within the training distribution

In each case, we want to apply the test case to the data and observe how the error in the prediction varies over time.

**NOTE** Do we use the model's own prediction as the ground truth?

**

In [None]:
import json
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
model_info = json.load(open("configs/model_info.json"))
pv_info = json.load(open("configs/pv_info.json"))
nn_transform_info = json.load(open("configs/normalization.json"))

In [None]:
from transformers import create_sim_to_nn_transformers
from transformed_model import KeyedTransformedModel
# get transformers for normailzation into NN
nn_input_transformer, nn_output_transformer = create_sim_to_nn_transformers(
    "configs/normalization.json"
)
test_min = torch.tensor(model_info["train_input_mins"])
test_max = torch.tensor(model_info["train_input_maxs"]).unsqueeze(0)

model = torch.load("torch_model.pt").double()

# define the NN surrogate that contains the NN, the input/output transformers for
# simulation units
surrogate = KeyedTransformedModel(
    model,
    nn_input_transformer,
    nn_output_transformer,
    model_info["model_in_list"],
    model_info["model_out_list"]
)


In [None]:
raw_x_data = np.load("data/x_raw_small.npy", allow_pickle=True)
raw_y_data = np.load("data/y_raw_small.npy", allow_pickle=True).astype('float')

x_df = pd.DataFrame(raw_x_data, columns=model_info['model_in_list'])
y_df = pd.DataFrame(raw_y_data, columns=model_info['model_out_list'])

preds = surrogate(torch.tensor(raw_x_data).double())

In [None]:
x_df.describe()

In [None]:
y_df.describe()

## Normal / Noisy Case
Here, we keep the parameters constant for a given period of time, including noise on the inputs

In [None]:
n_timesteps = 1000
nominals = raw_x_data.mean(axis=0)
perfect = np.tile(nominals, (n_timesteps,1))

In [None]:
def plot_features(data, data2=None, max_time=n_timesteps):
    if data.shape[1] != 16:
        data = data.T

    fig, ax = plt.subplots(4,4, figsize=(20,10))
    ax = ax.ravel()

    for idx, (feature_name, min_val, max_val) in enumerate(zip(model_info['model_in_list'], model_info['train_input_mins'], model_info['train_input_maxs'])):
        ax[idx].plot(data[:,idx], label=feature_name)
        ax[idx].hlines(min_val,xmin=0, xmax=max_time, color='k', linestyle='dashed')
        ax[idx].hlines(max_val,xmin=0, xmax=max_time, color='k', linestyle='dashed')
        if data2 is not None:
            ax[idx].plot(data2[:,idx])
        ax[idx].set_title(feature_name)

    fig.tight_layout()
    plt.show()

In [None]:
model_info['train_input_maxs']

In [None]:
# add noise
noise = np.random.normal(0, (raw_x_data.std(axis=0)+1e-4)*0.1, size=(n_timesteps,16))
noisy_data = perfect + noise
plot_features(noisy_data, perfect)

In [None]:
def error(perfect, prediction):
    mse = (perfect - prediction)**2
    if torch.is_tensor(mse):
        return mse.numpy()
    else:
        return mse

In [None]:
perfect_prediction = surrogate(torch.tensor(perfect).double())

In [None]:
def plot_output(data, mse_error, data2=perfect_prediction):
    fig, ax = plt.subplots(3,2, figsize=(20,10))
    ax = ax.ravel()

    for idx, output_name in enumerate(model_info['model_out_list']):
        pred_error = mse_error[:,idx].mean()
        ax[idx].plot(data2[:,idx], label='true')
        ax[idx].plot(data[:,idx], label='predicted')
        ax[idx].set_title(f'{output_name}: {pred_error:.6f}')
    
    ax[-1].plot(mse_error.mean(axis=1))
    ax[-1].set_title('MSE over time')

    fig.tight_layout()
    plt.show()

In [None]:
# for each 'timestep', use the model to make a prediction and compare it to the ground truth
# first we look at the perfect system
errors = error(perfect_prediction, perfect_prediction)
plot_output(perfect_prediction, errors)

In [None]:
# then we look at the noisy results to see how they compare
noisy_prediction = surrogate(torch.tensor(noisy_data).double())
noisy_error = error(perfect_prediction, noisy_prediction)
plot_output(noisy_prediction, noisy_error)

## Linear Drift

In [None]:
# now if we add a small shift to each of the input features as well as the noise
shifts = []
shift_scale = 0.001
for idx, name in enumerate(model_info['model_in_list']):
    if name.endswith('gradient'):
        # print(name)
        shift = shift_scale * nominals[idx]
    else:
        shift = 0
    shifts.append(shift)
# shifts = np.array([0.001*nominals[idx] if name.endswith('gradient') else 0 for idx, name in enumerate(model_info['model_in_list'])])
fig, ax = plt.subplots(1,2, figsize=(6,3))
drift = np.tile(shifts, (n_timesteps,1))
ax[0].plot(drift)
ax[0].set_title('drift constants')

drift = np.cumsum(drift,axis=0)
ax[1].plot(drift)
ax[1].set_title('cumulative effect of drift')
fig.tight_layout()
plt.show()

In [None]:
drifting_data = noisy_data + drift
plot_features(drifting_data, perfect)

In [None]:
# first we look at the perfect system
drifting_prediction = surrogate(torch.tensor(drifting_data).double())
drifting_error = error(perfect_prediction, drifting_prediction)
plot_output(drifting_prediction, drifting_error)