We saw in this [journal entry](http://wiki.noahbrenowitz.com/doku.php?id=journal:2018-10:day-2018-10-24#run_110) that multiple-step trained neural network gives a very imbalanced estimate, but the two-step trained neural network gives a good answer. Where do these two patterns disagree?

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import xarray as xr
import click
import torch
from uwnet.model import call_with_xr
import holoviews as hv
from holoviews.operation import decimate
hv.extension('bokeh')


def column_integrate(data_array, mass):
    return (data_array * mass).sum('z')


def compute_apparent_sources(model_path, ds):
    model = torch.load(model_path)
    
    return call_with_xr(model, ds, drop_times=0)

def get_single_location(ds, location=(32,0)):
    y, x = location
    return ds.isel(y=slice(y,y+1), x=slice(x,x+1))

def dict_to_dataset(datasets, dim='key'):
    """Concatenate a dict of datasets along a new axis"""
    keys, values = zip(*datasets.items())
    idx = pd.Index(keys, name=dim)
    return xr.concat(values, dim=idx)

def dataarray_to_table(dataarray):
    return dataarray.to_dataset('key').to_dataframe().reset_index()

def get_apparent_sources(model_paths, data_path):
    ds = xr.open_dataset(data_path)
    location = get_single_location(ds, location=(32,0))
    sources = {training_strategy: compute_apparent_sources(model_path, location)
              for training_strategy, model_path in model_paths.items()}
    return dict_to_dataset(sources)

In [None]:
model_paths = {
    'multi': '../models/113/3.pkl',
    'single': '../models/110/3.pkl'
}

data_path = "../data/processed/training.nc"

sources = get_apparent_sources(model_paths, data_path)

# Apparent moistening and heating

Here we scatter plot the apparent heating and moistening:

In [None]:
%%opts Scatter[width=500, height=500, color_index='z'](cmap='viridis', alpha=.2)
%%opts Curve(color='black')

lims = (-30, 40)
df = dataarray_to_table(sources.QT)
moisture_source = hv.Scatter(df, kdims=["multi", "single"]).groupby('z').redim.range(multi=lims, single=lims) \
        *hv.Curve((lims, lims))

lims = (-30, 40)
df = dataarray_to_table(sources.SLI)
heating = hv.Scatter(df, kdims=["multi", "single"]).groupby('z').redim.range(multi=lims, single=lims) \
            *hv.Curve((lims, lims))


moisture_source.relabel("Moistening (g/kg/day)") + heating.relabel("Heating (K/day)")

The multistep moistening is far too negative in the upper parts of the atmosphere, and the corresponding heating is too positive. Does this **happen because the moisture is negative in those regions**.