In [None]:
%matplotlib inline

In [None]:
import xarray as xr
import torch
from lib.models.torch import interface
from lib.advection import vertical_advection

In [None]:
def _compute_residual(x, f):
    dt = x.time[1]-x.time[0]
    return (f-f.shift(time=-1))/dt - f


def compute_residuals(inputs, forcings):
    """Compute Q1 and Q2"""
    ds = xr.Dataset({key: _compute_residual(inputs[key], forcings[key])
                for key in ['qt', 'sl']})
    
    return ds




def compare_srcs(src, resid, **kw):
    """Compare the x and time averaged source terms"""
    
    kw.update(dict(x="y", y="z"))
    fig, axs = plt.subplots(2, 2, figsize=(8, 6), sharey=True, sharex=True)
    src.sl.mean(['x', 'time']).plot(ax=axs[0,0], **kw)
    resid.sl.mean(['x', 'time']).plot(ax=axs[0,1], **kw)


    src.qt.mean(['x', 'time']).plot(ax=axs[1,0], **kw)
    resid.qt.mean(['x', 'time']).plot(ax=axs[1,1], **kw)

    for ax, title in zip(axs.flat,["NN Q1", "Q1", "NN Q2", "Q2"]):
        ax.set_title(title)

In [None]:
inputs = xr.open_dataset("../data/processed/inputs.nc")
forcings = xr.open_dataset("../data/processed/forcings.nc")

resid = compute_residuals(inputs, forcings)

In [None]:
model = torch.load("../data/output/model.test/1.torch")
src = interface.rhs(model, inputs, forcings)

In [None]:
compare_srcs(src, resid)

There are some very big differences between the prediction and the observed residuals. I bet this is because I perform this verification without first adding the forcing term. The actual time stepping taken by the neural network looks like
$$ x^* = x^n + \frac{h}{2}(g^{n+1} + g^n),\quad x^{n+1} = x^* + h f(x^*) $$
where $g^n$ is the advection forcing at time step $n$. There are probably very big splitting errors because I am using $h=3$ hours, which means that the statistics of $x^*$ might be very different from the statistics of $x^n$. This could have a large impact on the output of the neural network. To do this let's first apply the advection terms to the input data.

In [None]:
def trapezoid_step(x, g, h=.125):
    return x + h * (g + g.shift(time=1))/2

xst = inputs.apply(lambda x: trapezoid_step(x, forcings[x.name]) if x.name in ['sl', 'qt'] else x)
gavg = (forcings + forcings.shift(time=1))/2

Now let's compare the mean of $x^*$ and $x^n$.

In [None]:
bias = xst.mean(['x', 'time']) - inputs.mean(['x', 'time'])

In [None]:
fig, (aq, at)  = plt.subplots(1, 2, figsize=(8,3), sharey=True)

bias.qt.plot(ax=aq)
bias.sl.plot(ax=at)

aq.set_title("QT diff (g/kg)")
aq.set_title("SL diff (K)")

These differences are actually pretty small, so I would be surprised if they are causing the problem, but let's just see what the Neural network predicts the source terms will be.

In [None]:
src = interface.rhs(model, xst, gavg)

In [None]:
compare_srcs(src, resid)

This made a big difference! It turns out the splitting error actually matters a lot in this scenario.