In [None]:
%pylab inline

In [None]:
import numpy as np
from gnl.xarray import xr2mat, xr, integrate

In [None]:

input_variables = ['qt', 'sl']
output_variables = ['q1c', 'q2']
filenames = ["wd/calc/q1.nc", "wd/calc/q2.nc", "wd/calc/sl.nc", "wd/calc/qt.nc"]
sample_dims = ('x', 'time')
feature_dims = ('z',)

stat_file = 'wd/stat.nc'

# open datasets
D = xr.open_mfdataset(filenames)
# subtract tend
D['q1c' ] = D['q1'] - D['tend']


rho = xr.open_dataset(stat_file).RHO[-1]
p   = xr.open_dataset(stat_file).p
# compute appropriate weight
weight = np.sqrt(rho)
# demean data
D_nomean = D-D.mean(sample_dims)
# form data matrices
X, x_scale = xr2mat(D_nomean[input_variables], sample_dims, feature_dims, weight=weight, scale=False)
Y, y_scale = xr2mat(D_nomean[output_variables],sample_dims, feature_dims, weight=weight, scale=False)
# copmute data
X.load()
Y.load();

The condition number of $X$ is very large because there is alot of collinearity in the solutions

In [None]:
np.linalg.cond(X)

In [None]:
from sklearn.cross_decomposition import PLSRegression

In [None]:
mod = PLSRegression(n_components=4, scale=False)
mod.fit(X, Y)

In [None]:
Y_pred = mod.predict(X)
Y_pred = xr.DataArray(Y_pred, Y.coords).unstack('samples').unstack('features')\
           .to_dataset('variable')

In [None]:
pcolormesh(mod.x_weights_)
figure()
pcolormesh(mod.y_weights_)

In [None]:
import pandas as pd

output_name = "out.nc"

def flatten_output(x):
    
    for dim in ['features', 'samples']:
        try:
            x = x.unstack(dim)
        except ValueError:
            pass
    return x.to_dataset(dim='variable')

def parse_pls_output(mod, X, Y):
    ncomp = mod.n_components
    m = pd.Index(range(ncomp), name='m')
    # get weights
    xw = xr.DataArray(mod.x_weights_, (X.coords['features'], m), name="xw")
    yw = xr.DataArray(mod.y_weights_, (Y.coords['features'], m), name="yw")
    # get prediction
    y_pred = xr.DataArray(mod.predict(X), Y.coords, name="pred")
    return xw, yw, y_pred
    
def save_pls(mod, X, Y, output_name):
    xw, yw, y_pred = parse_pls_output(mod, X, Y)
    xw.pipe(flatten_output).to_netcdf(output_name, group="x_weights")
    yw.pipe(flatten_output).to_netcdf(output_name, group="y_weights", mode="a")
    xw.pipe(flatten_output).to_netcdf(output_name, group="pred", mode="a")
    
save_pls(mod, X, Y, "out.nc")

In [None]:
Y_pred.isel(x=0).q2.plot()

In [None]:
p0 = integrate(Y_pred.q1c + D.q1c.mean(sample_dims), 'z')/(2488)

In [None]:
plt.figure(figsize=(3,6))
p0.plot(x='x', y='time')

In [None]:
import gnl.plots

In [None]:
gnl.plots.loghist(p0.values.ravel())

This shows that the predicted precipitation does not have the typical fat tailed characteristics of realistic precipitation. Which indicates that a linear model---even if it is a good one---cannot possibly reproduce the observed nongaussianeity even for coarse grids.

## Linear response functiosn

In [None]:
lrf = np.load("wd/calc/lrf.npz")['arr_0']