# Training a Calibration Layer
In this notebook we analyse whether an additional calibration layer can be used to improve the performance of model on a dataset where some offset has been applied to simulate a miscalibration in a sensor. We do the study in **PV units**, adding the calibration layer BEFORE the PV -> sim transformation. 

In [None]:
from utils import load_lcls
import torch
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error
from lume_model.torch import LUMEModule
import json
from copy import deepcopy
from torch.nn import MSELoss
from botorch.models.transforms.input import InputTransform, Normalize

In [None]:
with open('configs/pv_info.json', 'r') as f:
    pv_info = json.load(f)
    f.close()

pv_info

In [None]:
nn_model = load_lcls('configs/lcls_variables.yml', 'configs/normalization.json')
output_transformer = deepcopy(nn_model.output_transformers[0])
input_transformer = deepcopy(nn_model._input_transformers[0])
# we remove the output transformation so we can make comparisons between the outcomes using MSE
nn_model._output_transformers = []

In [None]:
constants = [feature_name for feature_name, var in nn_model.input_variables.items() if var.value_range[0] == var.value_range[1]]
print(constants)

In [None]:
x_test = torch.from_numpy(np.load('data/x_raw_small.npy', allow_pickle=True).astype('float64'))
y_test = torch.from_numpy(np.load('data/y_raw_small.npy', allow_pickle=True).astype('float64'))
y_test = output_transformer(y_test)
print(x_test.shape)
print(y_test.shape)

In [None]:
conversions = torch.tensor([pv_info['pv_to_sim_factor'][pv_info['sim_name_to_pv_name'][feature_name]] for feature_name in nn_model.features])

class PVtoSimFactor(InputTransform, torch.nn.Module):
    def __init__(self, conversion: torch.Tensor) -> None:
        super().__init__()
        self._conversion = conversion
        self.transform_on_train = True
        self.transform_on_eval = True
        self.transform_on_fantasize = False

    def transform(self, x):
        return x * self._conversion

    def untransform(self, x):
        return x / self._conversion
    
pv_to_sim = PVtoSimFactor(conversions)
x_test_pv = pv_to_sim.untransform(x_test)
x_test_transformed = pv_to_sim.transform(x_test_pv)

# verify that the transformations work as expected
print(x_test)
print(x_test_pv)
print(x_test_transformed)


In [None]:
# we add the transformation from PV to sim into the input transformations of the model
# and use it to determine the error on the true dataset and the miscalibrated dataset
nn_model._input_transformers.insert(0, pv_to_sim)
print(nn_model.input_transformers)

We want to apply a given offset to each of the inputs, which we assume as some percentage of the mean measurement value and transform the original dataset using these offsets. 

In [None]:
class MiscalibratedInput():
    def __init__(self, x, scales=None, offset_percentage=None) -> None:
        self.offsets = self.calculate_offsets(x, offset_percentage)
        self.scales = self.calculate_scales(x, scales)

    def calculate_offsets(self, x, offset_percentage):
        if offset_percentage is None:
            # if no value is passed, we assume a 1% offset
            offset_percentages = torch.full((x.shape[-1],),0.1)
        elif isinstance(offset_percentage,float):
            offset_percentages = torch.full((x.shape[-1],),offset_percentage)
        else:
            if offset_percentage.shape[-1] != x.shape[-1]:
                raise ValueError(
                    f"""length of passed scales should equal the number of features in the dataset.\
                         Recevied: {offset_percentage.shape[-1]}, Expected: {x.shape[-1]}
                    """
                )
            offset_percentages = offset_percentage
        
        # using the percentages, we apply an offset of some percentage of the mean value
        mean_vals = []

        for i in range(x.shape[-1]):
            if x[...,i].std() == 0:
                mean = 1e-6
            else:
                mean = x_test_pv[:,i].mean().item()
            mean_vals.append(mean)

        mean_vals = torch.tensor(mean_vals)

        offsets = mean_vals * offset_percentages
        return offsets

    def calculate_scales(self, x, scale):
        if scale is None:
            scales = torch.ones(x.shape[-1])
        elif isinstance(scale, float):
            scales = torch.full((x.shape[-1]), scale)
        else:
            if scale.shape[-1] != x.shape[-1]:
                raise ValueError(
                    f"""length of passed scales should equal the number of features in the dataset.\
                         Recevied: {scale.shape[-1]}, Expected: {x.shape[-1]}
                    """
                )
            scales = scale
        return scales
    
    def get_data(self, x):
        return x * self.scales + self.offsets
    
mis_cal_input = MiscalibratedInput(
    x_test_pv,
    scales=torch.tensor([1.0 if feature_name not in constants  else 1.0 for feature_name in nn_model.features ]),
    offset_percentage=torch.full((len(nn_model.features),), 0.1)
)
x_test_pv_offset = mis_cal_input.get_data(x_test_pv)
print(x_test_pv)
print(x_test_pv_offset)
print(mis_cal_input.offsets)
print(mis_cal_input.scales)

In [None]:
nn_model.input_transformers

In [None]:
base_model = LUMEModule(nn_model, nn_model.features, nn_model.outputs)

no_offset_results = torch.transpose(base_model(x_test_pv).detach(), 1,0)
offset_results = torch.transpose(base_model(x_test_pv_offset).detach(), 1,0)

sort_idx = torch.argsort(y_test[:,0])

fig, ax = plt.subplots()
ax.plot(y_test[:,0][sort_idx], '.', label='y true')
ax.plot(no_offset_results[:,0][sort_idx], '.', label='y no offset')
ax.plot(offset_results[:,0][sort_idx], '.', label='y offset')

ax.legend()

plt.show()


The large error in the miscalibrated model is likely due to the constant values being offset. 

Using this offset dataset, we want to add an additional calibration layer prior to the PV->SIM conversion that will allow us to determine what value will bring the offset dataset back to the known domain. 

In order to train this calibration layer, the weights of the offset need to be normalised to a similar range for each feature to prevent some features from dominating over others during training, which helps to stabilise the training process. 

We train this normalisation layer using the known conversion dataset that we originally have. 

In [None]:
pv_normalization = Normalize(len(nn_model.features))
pv_normalization.train()
pv_normalization(x_test_pv)
pv_normalization.eval()

In [None]:
class CalibrationLayer(torch.nn.Module):
    def __init__(self, dim, input_transformer) -> None:
        super().__init__()
        self._input_transformer = input_transformer
        self.offsets = torch.nn.Parameter(torch.full((dim,),1e-6), requires_grad=True)
        # for now we assume that the scales are set, we only have offsets
        self.scales = torch.nn.Parameter(torch.ones(dim) , requires_grad=True)
    
    def forward(self, x):
        self._input_transformer.eval()
        x = self._input_transformer(x)
        x = x * self.scales + self.offsets
        x = self._input_transformer.untransform(x)
        return x

cal_layer = CalibrationLayer(len(nn_model.features), pv_normalization)
# check it can be called, for now it should return the exact same values
print(cal_layer(x_test_pv_offset))
print(x_test_pv_offset)

Now we add this layer into the input transformations for our LUMEModel and run it through a training loop using the offset dictionary and the true output values. 

NOTE what do we use as y here? The x_test_pv values or the y_true values. My gut tells me to use the x_test_pv values as we want the calibration layer to learn a transformation to bring the value back to what the original model was predicting, not some unknown uncertainty in the model.

In [None]:
x = deepcopy(x_test_pv_offset)
y_train = torch.transpose(deepcopy(base_model(x_test_pv).detach()),1,0)

In [None]:
class LUMEModuleT(LUMEModule):
    def __init__(self, model, feature_order, output_order) -> None:
        super().__init__(model, feature_order, output_order)
    def forward(self, x):
        result = super().forward(x)
        return torch.transpose(result, 1,0)

In [None]:
cal_layer = CalibrationLayer(len(nn_model.features), pv_normalization)
calibrated_nn = deepcopy(nn_model)
calibrated_nn._input_transformers.insert(0,cal_layer)

calibrated_model = LUMEModuleT(calibrated_nn, calibrated_nn.features, calibrated_nn.outputs)
calibrated_model.register_parameter('offsets', cal_layer.offsets)
# calibrated_model.register_parameter('scales', cal_layer.scales)

# now we define a training loop that trains the offsets
loss_fn = torch.nn.MSELoss()  # mean square error
optimizer = torch.optim.Adam(calibrated_model.parameters(), lr=0.1)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=100, gamma=0.5, verbose=False)


n_epochs = 500   # number of epochs to run

# Hold the best model
best_mse = torch.inf   # init to infinity
best_weights = None
val_history = []

for epoch in range(n_epochs):
    calibrated_model.train()
    # forward pass
    y_pred = calibrated_model(x)
    loss = loss_fn(y_pred, y_train)
    # backward pass
    optimizer.zero_grad()
    loss.backward()
    # update weights
    optimizer.step()
    scheduler.step()
    # evaluate accuracy at end of each epoch
    calibrated_model.eval()
    y_pred = calibrated_model(x)
    mse = loss_fn(y_pred, y_test).item()
    val_history.append(mse)
    if mse < best_mse:
        best_mse = mse
        best_weights = deepcopy(calibrated_model.state_dict())
 
# restore calibrated_model and return best accuracy
calibrated_model.load_state_dict(best_weights)

fig, ax = plt.subplots()
ax.plot(val_history[int(0.1*n_epochs):])
# ax.legend()
plt.show()

One thing to note is that as soon as any offset is introduced into the constant values, the error skyrockets and training completely breaks down.

In [None]:
print(cal_layer.offsets)
print(cal_layer.scales)

In [None]:
cal_layer.eval()
learned_real_offsets = x_test_pv_offset - (cal_layer(x_test_pv_offset) / cal_layer.scales).detach()
print(learned_real_offsets[0])
print(mis_cal_input.offsets)

In [None]:
fig, ax = plt.subplots(2,1,sharex='all',figsize=(15,10))

ax[0].bar(range(16), mis_cal_input.offsets, label='true offset',alpha=0.5)
ax[0].bar(range(16), learned_real_offsets[0], label='learned offset',alpha=0.5)
ax[0].legend()

ax[1].bar(range(16), mis_cal_input.scales, label='true offset',alpha=0.5)
ax[1].bar(range(16), cal_layer.scales.detach(), label='learned offset',alpha=0.5)
ax[1].legend()

plt.xticks(range(16), nn_model.features,rotation=90)
fig.tight_layout()
plt.show()

In [None]:
# now compare what the calibrated results look like versus the original model results
fig, ax = plt.subplots(figsize=(12,8))

no_offset_results = torch.transpose(base_model(x_test_pv).detach(), 1,0)
offset_results_no_calibration = torch.transpose(base_model(x_test_pv_offset).detach(), 1,0)
offset_results_with_calibration = calibrated_model(x_test_pv_offset).detach()

ax.plot(no_offset_results[:,0][sort_idx], '.', label='base no offset')
ax.plot(offset_results_no_calibration[:,0][sort_idx], '.', label='offset no calibration')
ax.plot(offset_results_with_calibration[:,0][sort_idx], '.', label='offset with calibration')
ax.plot(y_test[:,0][sort_idx], 'k--', label='GT')

ax.legend()
plt.show()