In [None]:
%matplotlib inline

In [None]:
import xarray as xr
from xnoah import swap_coord
from lib.plots import plot_soln

In the following cell I load the truth, SCAM and NN single column datasets.

In [None]:
# Truth
truth = xr.open_dataset("../data/processed/inputs.nc")
force = xr.open_dataset("../data/processed/forcings.nc")
truth = truth.assign(prec=force.Prec)
truth = swap_coord(truth, {'z': 'p'})

# SCAM
# cam = xr.open_dataset("../data/processed/iop/0-8/cam.nc").squeeze()\
#         .sel(time=truth.time[:-10])

def convert_time_to_days(time, t0=float(truth.time[0])):
    return t0 + (time-time[0]).astype('timedelta64[s]').astype(float)/86400

cam = xr.open_dataset("../data/processed/iop/0-8/cam.nc").squeeze()
cam = cam.assign_coords(time=convert_time_to_days(cam.time.values))
cam = cam.assign(p=cam.lev, prec=(cam.PRECL + cam.PRECC)*1000*86400, qt=cam.Q*1000,
                sl=cam['T'] + cam.Z3 * 9.81/1004)

# neural network scheme
nn_cols = xr.open_dataset("../data/output/columns.nc").assign(p=truth.p)
nn_cols = swap_coord(nn_cols, {'z': 'p'}).rename({'Prec': 'prec'})

Some routines that I will need to interpolate CAM's data.

In [None]:
from scipy.interpolate import interp1d

def interp_np(x, pold, pnew, axis=-1):
    return interp1d(pold, x, axis=axis, bounds_error=False)(pnew)

def pressure_interp(x, pnew):
    val = interp_np(x.values, x.p, pnew, axis=cam.sl.get_axis_num('p'))
    coords = {}
    coords.update(x.coords)
    coords['p'] = pnew
    dims = x.dims
    return xr.DataArray(val, coords=coords, dims=dims)

def pressure_interp_ds(ds, pnew):
    def f(x):
        if 'p' in x.dims:
            return pressure_interp(x, pnew)
        else:
            return x
        
    return ds.apply(f)

# Column plots

In this section I compare the observed time series for a given spatial location near the equator to the time series generated by forcing the neural network (NN) parametrization and the single column version of CAM.

I use the three dimensional advective tendency and the surface fluxes to force both NN and CAM, I also tried to match the diurnal cycle between the runs, but I am not sure I did this perfectly yet.

## SAM (Truth)

In [None]:
plot_soln(truth.isel(x=0,y=8))

## NN

In [None]:
plot_soln(nn_cols.isel(x=0, y=8))

## CAM

In [None]:
plot_soln(cam)

Just comparing these by eye we can see the neural network scheme does a dramatically better job than CAM. In particular, CAM predicts far too little precipitation.

The performance on the temperature field is worse. For short times of around 1-2 days, the scheme does a good job, but there is a subsantial warm bias in the equilibrium of the neural network scheme and SCAM.

However, for all cases SCAM does substantially worse than the NN scheme.

# Biases in mean state (pres vs lat)

In this section I look at the bias between the means of the neural network scheme and the time series from SAM. Running SCAM is quite expensive, so I have only run SCAM for the equatorial points.

In [None]:
nn_mean = nn_cols.mean(['x', 'time'])
truth_mean = truth.mean(['x', 'time'])
bias = (nn_mean - truth_mean)

# change units of y
y = (bias.y-np.median(bias.y))/1000
bias['y'] = y
nn_mean['y'] = y
truth_mean['y'] = y

In [None]:
def plot_pres_vs_lat(bias, ax, levels=np.arange(-5, 6)*.5, title=None):
    im = ax.contourf(bias.y, bias.p, bias, levels, cmap='bwr', extend='both')

    plt.colorbar(im, pad=.01, ax=ax)
    ax.set_xlabel('y (km)')
    
    if title:
        ax.set_title(title)

    

fig, axs = plt.subplots(1,2, figsize=(7,3), dpi=100, sharey=True)
plot_pres_vs_lat(bias.qt, axs[0], title="Humidity bias (g/kg)")
plot_pres_vs_lat(bias.sl, axs[1], title="Temperature bias (K)")
plt.subplots_adjust(wspace=.02)
axs[0].invert_yaxis()

Mean precipitation

In [None]:
# camprec = cam.prec.mean(['time', 'x'])

plt.figure(figsize=(5, 5/1.61), dpi=100)
truth_mean.prec.plot(label='Truth')
(nn_mean.prec).plot(label='NN')

# plt.scatter([0], [camprec], label='CAM')

plt.ylabel('Prec (mm/day)')
plt.legend(loc="upper right")

You can plainly see that SCAM dramatically underpredicts the amount of rain. I am not sure why it is so small.

# Biases in equilibrium state (equator)

In this section I just show vertical profiles of the bias that NN and CAM have compared to the mean on the equator.

In [None]:
def plot_bias(field, ax=None, title="", xlim=None, unit=None):
    obs = truth[field].isel(y=7).mean(['time', 'x'])
    qt_profs = [('CAM', cam[field].mean(['time', 'x'])),
                ('NN', nn_cols[field].isel(y=7).mean(['time', 'x']))]

    if ax is None:
        fig, ax = plt.subplots(1,1, figsize=(2,3), dpi=100)

    for key, val in qt_profs:
        ax.plot(val-obs, val.p, label=key)
    ax.set_title(title, size=10)
    if xlim:
        ax.set_xlim(xlim)
    if unit:
        ax.set_xlabel(unit)


In [None]:
fig, axs = plt.subplots(1,2, figsize=(4,3), dpi=100, sharey=True)

plot_bias('qt', title='Total water bias', unit='g/kg',
          ax=axs[0], xlim=[-.5, 2.5])
plot_bias('sl', title='Temperature bias', unit='K',
          ax=axs[1], xlim=[-1, 5])


axs[0].invert_yaxis()
axs[0].set_ylabel('p (hPa)')
axs[1].legend()
axs[1].set_ylim([1000, 80])

The bias of NN is much better than CAM.

# Error growth (equator)

In this section, I plot the dynamic growth of errors after the beginning of the simulations. I do this to provide a more quantitative perspective on the pressure vs time series above.

I compare the zonally averaged mean absolute deviation (MAD) for four different predictions

1. MAD between the time series and its time and zonal mean. This measures the magnitude of the fluctuations about the climatology.
2. Persistence forecast. This forecast assumes that humidity and temperature do not change over the course of the simulation. This gives us an estimate of the time-scale over which the fields change naturally.
3. SCAM based prediction
4. NN based prediction

In [None]:
def mean_squared_error(truth, pred, dims=('x',)):
    return ((truth-pred).fillna(0.0)**2).mean(dims)

def mean_absolute_dev(truth, pred, dims=('x',)):
    return (truth-pred).fillna(0.0).apply(np.abs).mean(dims)


prog_vars = ['sl', 'qt']
truthp = truth[prog_vars]

# means
mu = truth[prog_vars].mean(['time'])
munn = nn_cols[prog_vars].mean(['time'])
truth0 = truth[prog_vars].isel(time=0)


# mse of prediction
mses = dict(
    NN=mean_absolute_dev(truthp, nn_cols),
    Mean=mean_absolute_dev(truthp, mu),
    Persistence=mean_absolute_dev(truthp, truth0)
)

camp = pressure_interp_ds(cam[prog_vars], truth.p)
cam_mse = mean_absolute_dev(camp, truthp.isel(y=7))


# q_contours = np.arange(2)*11

def plot_mses(mses, axs=None, label='', **kwargs):
    if axs is None:
        fig, axs =plt.subplots(2, 2, sharey=True, sharex=True, dpi=100,
                              figsize=(6, 3.5))
        
    keys = ['Mean', 'Persistence', 'NN', 'CAM']
    
    for ax, key in zip(axs.flat, keys):
        val = mses[key]
        im = ax.contourf(val.time, val.p, val.T, **kwargs)

        ax.text(.05, .8, key,
                    transform=ax.transAxes,
                    bbox=dict(color='white'))
        
    
    axs[0,0].invert_yaxis()
    axs[0,0].set_ylabel('p (hPa)')
    axs[1,0].set_ylabel('p (hPa)')
    
    for ax in axs[1,:]:
        ax.set_xlabel('days')
    
    plt.subplots_adjust(wspace=.02, hspace=.02)
    cb = plt.colorbar(im, ax=axs, pad=.01)
    cb.set_label(label)
    
    return axs, cb
    

## Humidity Errors

In [None]:
mses_meridional_average = {key: val.qt.isel(y=8).sel(time=slice(100, 120))
                           for key, val in mses.items()}
mses_meridional_average['CAM'] = cam_mse.qt.sel(time=slice(100, 120))

_, cb = plot_mses(mses_meridional_average, levels=np.arange(11)*.25,
                  extend='max',
                  label='MAD (g/kg)')

We can see errors grow much more slowly for NN than for the other quantities. In practice, the NN is able to predict around 5-7 days before it decays to the mean. This is much vetter than either the persistence forecast or CAM.

## Temperature Errors

In [None]:
mses_sl_meridional_average = {key: val.sl.isel(y=0).sel(time=slice(100, 120))
                           for key, val in mses.items()}
mses_sl_meridional_average['CAM'] = cam_mse.sl.sel(time=slice(100, 120))

plot_mses(mses_sl_meridional_average, levels=.5*np.arange(11), extend='max',
          label='MAD (K)');

The temperature predicted by the NN diverges from the truth much faster that it does for the humidity. Some large biases in the temperature emerge in the NN and CAM schemes.  Moreover, it appears the neural network misses the diurnal cycle of temperature.