I will develop some nice visualizations of the MCA modes in this notebook.

First some header information:

In [None]:
!git rev-parse HEAD
!git status

In [None]:
from sklearn.externals import joblib
import xarray as xr
import pandas as pd
import numpy as np


import matplotlib.pyplot as plt
%matplotlib inline


import holoviews as hv
import datashader as ds
from holoviews.operation.datashader import aggregate, shade, datashade, dynspread
hv.extension('bokeh')

In [None]:
from lib.util import mat_to_xarray

def transformed_to_xarray(mat, samples_idx):
    """Create DataArray from input and output MCA scores"""
    return mat_to_xarray(mat, {0: samples_idx}).unstack('dim_0')

In [None]:
mca_output = joblib.load("../data/ml/ngaqua/mca.pkl")

# get samples idx from training data
data = joblib.load("../data/ml/ngaqua/data.pkl")
samples_idx = data['train'][1].indexes['samples']

input_scores = transformed_to_xarray(mca_output['transformed'][0], samples_idx)
output_scores = transformed_to_xarray(mca_output['transformed'][1], samples_idx)

## Zonal Hovmoller Diagrams


In [None]:
def plot_zonal_hovmoller(input_scores, output_scores):
    """Plot the Hovmoller diagram of the meridionally averaged data"""


    def adjust_dims(x):
        return x.assign(x=x.x/1e6, dim_1=x.dim_1+1)

    in_score_yavg = input_scores.mean('y').to_dataset(name='X').pipe(adjust_dims)
    out_score_yavg = output_scores.mean('y').to_dataset(name='Y').pipe(adjust_dims)
    
    hmap = (hv.Dataset(in_score_yavg).to.image(kdims=["x", "time"], label="SL, QT")
       +hv.Dataset(out_score_yavg).to.image(kdims=["x", "time"], label="Q1, Q2"))\
    .redim.label(dim_1="Mode")\
    .redim.unit(x="1000 km", time="d")
    return hmap


In [None]:
%%opts Image[width=200, height=400](cmap='RdBu') {+framewise}

plot_zonal_hovmoller(input_scores, output_scores)

Unfortunately, these Hovmoller diagrams are not that informative. It looks like there are some large eastward moving disturbances, but that is about all the insight I can get from these plots.

Why are the values on the right so positive or and negative (or is this just a plotting artifact).

# Time Series

In [None]:
def plot_ts(x=0,y=0,dim_1=0):
    return (hv.Dataset(input_scores.to_dataset(name="X")\
                      .isel(x=x,y=y,dim_1=dim_1-1))\
                  .to.curve("time") 
    + hv.Dataset(output_scores.to_dataset(name="Y")\
                      .isel(x=x,y=y,dim_1=dim_1-1))\
                  .to.curve("time")).cols(1)

In [None]:
%%opts Curve[width=600, height=200] {+framewise}

dmap = (hv.DynamicMap(plot_ts, kdims=["x", "y", "dim_1"])
.redim.range(x=(0, 10), y=(0,16), dim_1=(1,4))
.redim(dim_1="Mode"))
dmap

In [None]:
%%output filename="../docs/images/mca_ts" fig="png"
dmap[0, 8,1]

The most obvious feature of these diagrams is that the input scores have much longer time-scales than the output scores do. However, the input scores do correlate with the output scores.

Here is a scatter plot of first x score compared versus the first y score. Because this is about 100,000 points, I will use datashader to make the scatter plot.

In [None]:
%%opts RGB[width=400, height=400]

pts = dynspread(
    datashade(hv.Scatter((input_scores[...,0].values.ravel(),
                          output_scores[...,0].values.ravel())),
              cmap='blue'))
pts.redim.range(y=(-100,100))

# Vertical Structure of the modes

In [None]:

from xnoah.data_matrix import unstack_cat
plt.style.use('seaborn')


# scale modes by max of qt between 0 and  8000 m
def get_scale(qt):
    idx_max = np.abs(qt.sel(z=slice(0, 8000))).argmax('z')
    pts = qt.isel_points(dim_0=range(4), z=idx_max.values)
    
    return xr.DataArray(pts.values, (pts.dim_0,)).rename({'points': 'dim_0'})

def plot_scaled_modes(mca_output):
    
    # load data
    x_modes, y_modes = [unstack_cat(x, 'features') for x in mca_output['modes']]

    # scale output
    scale = get_scale(x_modes.qt)
    x_modes = x_modes/scale
    y_modes = y_modes/scale


    # open figure and plot
    fig, axs = plt.subplots(2,2, figsize=(4,6), sharey=True)

    for i in range(2):
        x = x_modes.isel(dim_0=i)
        y = y_modes.isel(dim_0=i)
        z  =x.z/1e3
        axs[i,0].plot(x.qt, z, label="QT [g/kg]")
        axs[i,0].plot(x.sl, z, label="SL [g/kg]")
        axs[i,0].legend()

        axs[i,0].set_xticks([-.5,0, .5, 1.0])

        axs[i,0].set_title(f"Mode {i+1}")

        axs[i,1].plot(y.Q1c, z, label="Q1c [K/d]")
        axs[i,1].plot(y.Q2, z, label="Q2 [K/d]")
        axs[i,1].legend()

    for ax in axs[:,0]:
        ax.set_ylabel('z [km]')

    plt.tight_layout()
    
    
plot_scaled_modes(mca_output)
plt.savefig("../docs/images/mca_modes.pdf")

Convection shallows(deepens) when the sign of mode 2 is positive.