In [None]:
import sys
import matplotlib.pyplot as plt
import numpy as np
from pandas import Index
import pandas as pd
from scipy.stats import pearsonr, gaussian_kde

class numeric:
    Int64Index = Index

sys.modules['pandas.core.indexes.numeric'] = numeric


In [None]:
idirs = [
    '../tuning_paper_figures/cfg01',
    '../tuning_paper_figures/cfg02',
]
odir = '../tuning_paper_figures'

label_name_map = {
    'compression_factor': '$P_0$',
    'C_lab': '$c_\\text{ref}$',
    'exponent_compression_factor': '$H$',
    'compaction_param': '$C$',
    'ERA5_quad_drag_coef_air': '$C_A$',
    'nu0': '$\\nu$',
    'tan_phi': '$\\mu$',
}

cube_sizes = [50, 70]

In [None]:
epsilon_precision = dict(
    compression_factor = "%0.1f",
    C_lab = "%0.1f",
    nu0 = "%0.3f",
    tan_phi = "%0.3f",
    exponent_compression_factor = "%0.2f",
    compaction_param = "%0.1f",
    ERA5_quad_drag_coef_air = "%0.5f",
)

for idir in idirs:
    ofile = f'{odir}/fig00_scatterplot_nn_acuracy_{idir.split("/")[-1]}.png'
    print(ofile)
    rgps_pred_params = np.load(f'{idir}/nn_training.npz', allow_pickle=True)['rgps_pred_params'].item()
    test_pred_params = np.load(f'{idir}/nn_training.npz', allow_pickle=True)['test_pred_params'].item()
    test_labe_params = np.load(f'{idir}/nn_training.npz', allow_pickle=True)['test_labe_params'].item()

    rgps_lr_params = np.load(f'{idir}/lr_training.npz', allow_pickle=True)['rgps_pred_params'].item()
    test_lr_params = np.load(f'{idir}/lr_training.npz', allow_pickle=True)['test_pred_params'].item()
    labe_lr_params = np.load(f'{idir}/lr_training.npz', allow_pickle=True)['test_labe_params'].item()

    param_names = list(rgps_pred_params.keys())
    print(param_names)

    fig, axs = plt.subplots(1,5, figsize=(20,4))
    plt.xticks(rotation=30)

    kde_factor = 0.3
    for i, param_name in enumerate(param_names):
        labe = np.hstack(test_labe_params[param_name])
        pred = np.hstack(test_pred_params[param_name])
        rgps = np.hstack(rgps_pred_params[param_name])

        labe_lr = np.hstack(labe_lr_params[param_name])
        pred_lr = np.hstack(test_lr_params[param_name])
        rgps_lr = np.hstack(rgps_lr_params[param_name])

        pv, pr = pearsonr(labe, pred)
        rmse = np.mean((labe - pred)**2)**0.5
        rrmse = 100 * rmse / (labe.max() - labe.min())

        pv_lr, pr_lr = pearsonr(labe_lr, pred_lr)
        rmse_lr = np.mean((labe_lr - pred_lr)**2)**0.5
        rrmse_lr = 100 * rmse_lr / (labe_lr.max() - labe_lr.min())

        l0 = axs[i].plot(labe, pred, '.', alpha=0.1)
        l1 = axs[i].plot([labe.min(), labe.max()], [labe.min(), labe.max()], 'k-')
        axs[i].set_ylim([labe.min(), labe.max()])
        axs[i].set_xlim(np.percentile(pred, [0.5, 99.5]))
        axs[i].set_xlabel(f'{label_name_map[param_name]}', fontsize=16)

    axs[0].set_ylabel('Retrieved')
    if 'cfg01' in idir:
        axs[4].set_axis_off()
    plt.tight_layout()
    plt.savefig(ofile, dpi=150, bbox_inches='tight', pad_inches=0.1)
    plt.show()


In [None]:
xlims = dict(
    compression_factor = [0, 20000],
    C_lab = [0, 2.9e6],
)

precision = dict(
    compression_factor = "%0.1f",
    C_lab = "%0.1f",
    nu0 = "%0.3f",
    tan_phi = "%0.2f",
    exponent_compression_factor = "%1.2f",
    compaction_param = "%1.1f",
    ERA5_quad_drag_coef_air = "%0.5f",
)

for idir, cube_size in zip(idirs, cube_sizes):
    exp_no = idir.split("/")[-1][-1]
    cube_file = f'../tuning_paper_figures/latin_cube_{exp_no}.df'
    cube_df = pd.read_pickle(cube_file)[:cube_size]
    ofile = f'{odir}/fig00_histogram_nn_lr_{idir.split("/")[-1]}.png'
    print(ofile)
    rgps_pred_params = np.load(f'{idir}/nn_training.npz', allow_pickle=True)['rgps_pred_params'].item()
    test_pred_params = np.load(f'{idir}/nn_training.npz', allow_pickle=True)['test_pred_params'].item()
    test_labe_params = np.load(f'{idir}/nn_training.npz', allow_pickle=True)['test_labe_params'].item()

    rgps_lr_params = np.load(f'{idir}/lr_training.npz', allow_pickle=True)['rgps_pred_params'].item()
    test_lr_params = np.load(f'{idir}/lr_training.npz', allow_pickle=True)['test_pred_params'].item()
    labe_lr_params = np.load(f'{idir}/lr_training.npz', allow_pickle=True)['test_labe_params'].item()

    param_names = list(rgps_pred_params.keys())

    lbls = ['input', 'NN', 'LR']
    fig, axs = plt.subplots(1,5, figsize=(17.5, 3.5))
    plt.xticks(rotation=30)
    kde_factor = 0.3
    for i, param_name in enumerate(param_names):
        labe = cube_df[param_name].to_numpy()
        rgps = np.hstack(rgps_pred_params[param_name])
        rgps_lr = np.hstack(rgps_lr_params[param_name])

        for a, lbl in zip([labe, rgps, rgps_lr], lbls):
            points = np.linspace(a.min(), a.max(), 50)
            kde = gaussian_kde(a, kde_factor)
            axs[i].plot(points, kde(points), '-', label=lbl)
        axs[i].set_xlabel(f'{label_name_map[param_name]}', fontsize=16)
        if param_name in xlims:
            axs[i].set_xlim(xlims[param_name])

    axs[0].legend()
    if len(param_names) == 4:
        axs[4].set_axis_off()
    plt.tight_layout()
    plt.savefig(ofile, dpi=150, bbox_inches='tight', pad_inches=0.1)
    plt.show()


In [None]:
kde_factor = 0.3
methods = ['lr', 'nn']
for idir in idirs:
    conf_name = idir.split('/')[-1][-1]
    rgps_pred = np.load(f'{idir}/nn_training.npz', allow_pickle=True)['rgps_pred_params'].item()
    param_names = list(rgps_pred.keys())
    for i, param_name in enumerate(param_names):
        for method in methods:
            rgps_pred = np.load(f'{idir}/{method}_training.npz', allow_pickle=True)['rgps_pred_params'].item()
            test_pred = np.load(f'{idir}/{method}_training.npz', allow_pickle=True)['test_pred_params'].item()
            test_labe = np.load(f'{idir}/{method}_training.npz', allow_pickle=True)['test_labe_params'].item()

            rgps = np.hstack(rgps_pred[param_name])
            labe = np.hstack(test_labe[param_name])
            pred = np.hstack(test_pred[param_name])

            pv, pr = pearsonr(labe, pred)
            rmse = np.mean((labe - pred)**2)**0.5
            points = np.linspace(rgps.min(), rgps.max(), 50)
            kde = gaussian_kde(rgps, kde_factor)
            kde_points =kde(points)
            table_line = f'{label_name_map[param_name]} & {conf_name} & {method.upper()} & {pv:0.2} & {precision[param_name]} \\\\' % np.mean(rgps)
            print(table_line)
