In [None]:
import holoviews as hv

hv.extension('bokeh')

In [None]:
import xarray as xr
import numpy as np
from sklearn.externals import joblib

from lib.models import get_linear_model, get_mca_mod
from lib.util import mat_to_xarray, output_to_xr, dict_to_xr
from lib.plots.model_evaluation import scatter_plot_z


def resid_from_data(get_model, data):
    mod = get_model(data)
    mod.fit(*data['train'])
    
    x, y = data['train']
    
    # get prediction
    y_pred = mod.predict(x)

    # unstack
    return output_to_xr(y_pred, y.coords)



In [None]:
# load data
data = joblib.load("../data/ml/ngaqua/data.pkl")


resid_lm = resid_from_data(get_linear_model, data)
resid_mcr = resid_from_data(get_mca_mod, data)

In [None]:
# get output
_, y = data['train']
y = output_to_xr(y, y.coords)
data = dict_to_xr({'lm': resid_lm, 'true': y, 'mcr': resid_mcr}, dim_name="model")

In [None]:
%%opts Curve[invert_axes=True] {+framewise}
lay = hv.Dataset(data.Q1c.isel(x=0,y=8))\
.to.curve("z", dynamic=True)\
.overlay("model")
lay

In [None]:
%%output dpi=150
%%opts Image[colorbar=True, width=400](cmap='viridis') 
%%opts GridSpace[shared_xaxis=True, shared_yaxis=True, xaxis=None]
hv.Dataset(data.Q1c.isel(y=8,x=120))\
.to.image(["time", "z"])\
.add_dimension("nil", 0, "")\
.grid()

We can see that MCR performs much worse than the lienar regression. And that the raw linear regression actually performs pretty well. Even though the linear response function of linear regression looks ugly it does yield a good prediction.

In [None]:
scatter_plot_z(data.Q1c, "lm", "true", "model")

The linear model tends to underestimate the heating for large heating rates