In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

In [None]:
import xarray as xr
import numpy as np
import pandas as pd

from sklearn.externals import joblib
import holoviews as hv
from lib.models import plot_lrf
hv.extension('bokeh')

# Visualizing lrfs

In [None]:
def get_lrf(lm_data):
    lrf, in_idx, out_idx = lm_data['mat'], lm_data['features']['in'], lm_data['features']['out']
    return pd.DataFrame(lrf, index=in_idx, columns=out_idx)

In [None]:
w = np.asarray(xr.open_dataarray("../data/processed/ngaqua/w.nc"))


lm_data = joblib.load("../data/ml/ngaqua/linear_model.pkl")
lrf = get_lrf(lm_data)

p = xr.open_dataset("../data/ngaqua/stat.nc").p
plot_lrf(lrf, p, input_vars=['QT', 'SL', 'LHF', 'SHF'],
         output_vars=['Q1c', 'Q2']);

def myquad(key, **kwargs):    
    m,n = lrf_pane.shape
    return quadmesh((p[:n], p[:m], lrf_pane), **kwargs)


def make_row(output_var, lrf, p):
    row = myquad(lrf[(output_var, 'qt')], p, label="QT", group=output_var) + \
          myquad(lrf[(output_var, 'sl')], p, label="SL",  group=output_var) + \
          hv.Curve(lrf[(output_var, 'shf')][0,:], label="SHF",  group=output_var) * \
          hv.Curve(lrf[(output_var, 'lhf')][0,:], label="LHF",  group=output_var)
    
    return row


opts = {'Curve': {'plot': dict(invert_axes=True, width=200),
                 'norm': dict(axiswise=True)},
        'QuadMesh':{'plot': dict(colorbar=True, invert_yaxis=True, invert_xaxis=True, invert_axes=True),
                   'style': dict(cmap='viridis'),
                   'norm': dict(axiswise=True)}}


def plot_lrf(lrf, p):
    return (make_row('Q1c', lrf, p) + make_row('Q2', lrf, p)).cols(3).opts(opts)
    


plot_lrf(lrf, p)

## First weighting the LRF by the projection operator

This is achieved by $\phi^{\dagger} \phi M$.  Where $\phi$ has shape (n_components, n_features). And $\phi^{\dagger}$ is the pseudo inverse after weighting and scaling.

In [None]:
data = joblib.load("../data/ml/ngaqua/data.pkl")
mca_data = joblib.load("../data/ml/ngaqua/mca.pkl")
lm_data = joblib.load("../data/ml/ngaqua/linear_model.pkl")

# get indexes
feats,outs  = lm_data['features']['in'], lm_data['features']['out']

# make dataframes representing the linear ops
lrf = pd.DataFrame(lm_data['mat'], index=feats, columns=outs)
proj = pd.DataFrame(mca_data['projection'], index=feats, columns=feats)

## Projection operator

Here is a messy plot of the projection operator. Note that the variables cannot be projected independently because this operator applies to the whole feature vector. Ideally this would look like a smoothed version of the identity matrix.

In [None]:
%%opts Raster[colorbar=True](cmap='viridis') Curve[invert_axes=True width=150] {+axiswise}
_p = np.asarray(proj)
hv.Raster(_p[:,:-2]) + hv.Curve(_p[:,-1],) * hv.Curve(_p[:,-2])

## Projected LRF

In [None]:
lrf_proj = np.asarray(proj)@np.asarray(lrf)
lrf_proj = pd.DataFrame(lrf_proj, index=feats, columns=outs)

In [None]:
plot_lrf(lrf_proj, p, input_vars=['QT', 'SL', 'LHF', 'SHF'],
         output_vars=['Q1c', 'Q2']);

I am not exactly sure why the units on the right are so different. Maybe it's because they were not scaled properly in the mca analysis.