### Creation of data for Figure 6 (Molina, Gagne, and Prein; under review)

Imports

In [1]:
import xarray as xr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import glob
from configpaths import dlproj_main, current_dl_models, future_dl_models

In [2]:
model_num = '25'

In [3]:
def no_negs(data):
    """
    Function to convert negative data to 0.0 for plot range 0-to-1.
    """
    data[data<0] = 0
    return data

Load all current, future, and outlier PFI results for the 20 variables including performance diagrams.

In [5]:
files_current={}
for var in range(20):
    path = f'{current_dl_models}/model25/scalar_results_nomask_model{model_num}_random*_pfivar{var}_perm*.csv'
    files = glob.glob(path)
    li = []
    for filename in files:
        df = pd.read_csv(filename, index_col=None, header=0)
        li.append(df)
    files_current[var] = pd.concat(li, axis=0, ignore_index=True)
    
files_future={}
for var in range(20):
    path = f'{future_dl_models}/scalar_results_nomask_model{model_num}_random*_pfivar{var}_perm*.csv'
    files = glob.glob(path)
    li = []
    for filename in files:
        df = pd.read_csv(filename, index_col=None, header=0)
        li.append(df)
    files_future[var] = pd.concat(li, axis=0, ignore_index=True)
    
files_out={}
for var in range(20):
    path = f'{future_dl_models}/scalar_outresults_nomask_model{model_num}_random1_pfivar{var}_perm*.csv'
    files = glob.glob(path)
    li = []
    for filename in files:
        df = pd.read_csv(filename, index_col=None, header=0)
        li.append(df)
    files_out[var] = pd.concat(li, axis=0, ignore_index=True)

Load all original error metrics for performance diagrams without permutation.

In [16]:
li = []
for rndm in range(5):
    filename = f'{current_dl_models}/model25/scalar_results_nomask_model{model_num}_random{rndm+1}.csv'
    df = pd.read_csv(filename, index_col=None, header=0)
    li.append(df)
orig_c_r1 = pd.concat(li, axis=0, ignore_index=True)

li = []
for rndm in range(5):
    filename = f'{future_dl_models}/scalar_results_nomask_model{model_num}_random{rndm+1}.csv'
    df = pd.read_csv(filename, index_col=None, header=0)
    li.append(df)
orig_f_r1 = pd.concat(li, axis=0, ignore_index=True)

filename = f'{future_dl_models}/scalar_outresults_nomask_model{model_num}_random1.csv'
orig_o_r1 = pd.read_csv(filename, index_col=None, header=0)

Load all current, future, and outlier PFI results for the 20 variables including attributes diagrams.

In [19]:
bss_current={}
for var in range(20):
    path = f'{current_dl_models}/model25/bss_scalar_results_nomask_model{model_num}_random*_0.05_pfivar{var}_perm*.csv'
    files = glob.glob(path)
    li = []
    for filename in files:
        df = pd.read_csv(filename, index_col=None, header=0)
        li.append(df)
    bss_current[var] = pd.concat(li, axis=0, ignore_index=True)
    
bss_future={}
for var in range(20):
    path = f'{future_dl_models}/bss_scalar_results_nomask_model{model_num}_random*_0.05_pfivar{var}_perm*.csv'
    files = glob.glob(path)
    li = []
    for filename in files:
        df = pd.read_csv(filename, index_col=None, header=0)
        li.append(df)
    bss_future[var] = pd.concat(li, axis=0, ignore_index=True)
    
bss_out={}
for var in range(20):
    path = f'{future_dl_models}/bss_scalar_outresults_nomask_model{model_num}_random1_0.1_pfivar{var}_perm*.csv'
    files = glob.glob(path)
    li = []
    for filename in files:
        df = pd.read_csv(filename, index_col=None, header=0)
        li.append(df)
    bss_out[var] = pd.concat(li, axis=0, ignore_index=True)

Load all original error metrics for attributes diagrams without permutation.

In [21]:
li = []
for rndm in range(5):
    filename = f'{current_dl_models}/model25/bss_scalar_results_nomask_model{model_num}_random{rndm+1}_0.05.csv'
    df = pd.read_csv(filename, index_col=None, header=0)
    li.append(df)
bssorig_c_r1 = pd.concat(li, axis=0, ignore_index=True)

li = []
for rndm in range(5):
    filename = f'{future_dl_models}/bss_scalar_results_nomask_model{model_num}_random{rndm+1}_0.05.csv'
    df = pd.read_csv(filename, index_col=None, header=0)
    li.append(df)
bssorig_f_r1 = pd.concat(li, axis=0, ignore_index=True)

filename = f'{future_dl_models}/bss_scalar_outresults_nomask_model{model_num}_random1_0.1.csv'
bssorig_o_r1 = pd.read_csv(filename, index_col=None, header=0)

Assemble the arrays with change in error data for box and whisker plots.

In [23]:
auc_current_ = no_negs(
    np.array([orig_c_r1["AUC"].mean()-files_current[0]["AUC"].values,
                         orig_c_r1["AUC"].mean()-files_current[1]["AUC"].values,
                         orig_c_r1["AUC"].mean()-files_current[2]["AUC"].values,
                         orig_c_r1["AUC"].mean()-files_current[3]["AUC"].values,
                         orig_c_r1["AUC"].mean()-files_current[4]["AUC"].values,
                         orig_c_r1["AUC"].mean()-files_current[5]["AUC"].values,
                         orig_c_r1["AUC"].mean()-files_current[6]["AUC"].values,
                         orig_c_r1["AUC"].mean()-files_current[7]["AUC"].values,
                         orig_c_r1["AUC"].mean()-files_current[8]["AUC"].values,
                         orig_c_r1["AUC"].mean()-files_current[9]["AUC"].values,
                         orig_c_r1["AUC"].mean()-files_current[10]["AUC"].values,
                         orig_c_r1["AUC"].mean()-files_current[11]["AUC"].values,
                         orig_c_r1["AUC"].mean()-files_current[12]["AUC"].values,
                         orig_c_r1["AUC"].mean()-files_current[13]["AUC"].values,
                         orig_c_r1["AUC"].mean()-files_current[14]["AUC"].values,
                         orig_c_r1["AUC"].mean()-files_current[15]["AUC"].values,
                         orig_c_r1["AUC"].mean()-files_current[16]["AUC"].values,
                         orig_c_r1["AUC"].mean()-files_current[17]["AUC"].values,
                         orig_c_r1["AUC"].mean()-files_current[18]["AUC"].values,
                         orig_c_r1["AUC"].mean()-files_current[19]["AUC"].values]))

csi_current_ = no_negs(
    np.array([orig_c_r1["CSI"].mean()-files_current[0]["CSI"].values,
                         orig_c_r1["CSI"].mean()-files_current[1]["CSI"].values,
                         orig_c_r1["CSI"].mean()-files_current[2]["CSI"].values,
                         orig_c_r1["CSI"].mean()-files_current[3]["CSI"].values,
                         orig_c_r1["CSI"].mean()-files_current[4]["CSI"].values,
                         orig_c_r1["CSI"].mean()-files_current[5]["CSI"].values,
                         orig_c_r1["CSI"].mean()-files_current[6]["CSI"].values,
                         orig_c_r1["CSI"].mean()-files_current[7]["CSI"].values,
                         orig_c_r1["CSI"].mean()-files_current[8]["CSI"].values,
                         orig_c_r1["CSI"].mean()-files_current[9]["CSI"].values,
                         orig_c_r1["CSI"].mean()-files_current[10]["CSI"].values,
                         orig_c_r1["CSI"].mean()-files_current[11]["CSI"].values,
                         orig_c_r1["CSI"].mean()-files_current[12]["CSI"].values,
                         orig_c_r1["CSI"].mean()-files_current[13]["CSI"].values,
                         orig_c_r1["CSI"].mean()-files_current[14]["CSI"].values,
                         orig_c_r1["CSI"].mean()-files_current[15]["CSI"].values,
                         orig_c_r1["CSI"].mean()-files_current[16]["CSI"].values,
                         orig_c_r1["CSI"].mean()-files_current[17]["CSI"].values,
                         orig_c_r1["CSI"].mean()-files_current[18]["CSI"].values,
                         orig_c_r1["CSI"].mean()-files_current[19]["CSI"].values]))

bss_current_ = no_negs(
    np.array([bssorig_c_r1["BSS"].mean()-bss_current[0]["BSS"].values,
                         bssorig_c_r1["BSS"].mean()-bss_current[1]["BSS"].values,
                         bssorig_c_r1["BSS"].mean()-bss_current[2]["BSS"].values,
                         bssorig_c_r1["BSS"].mean()-bss_current[3]["BSS"].values,
                         bssorig_c_r1["BSS"].mean()-bss_current[4]["BSS"].values,
                         bssorig_c_r1["BSS"].mean()-bss_current[5]["BSS"].values,
                         bssorig_c_r1["BSS"].mean()-bss_current[6]["BSS"].values,
                         bssorig_c_r1["BSS"].mean()-bss_current[7]["BSS"].values,
                         bssorig_c_r1["BSS"].mean()-bss_current[8]["BSS"].values,
                         bssorig_c_r1["BSS"].mean()-bss_current[9]["BSS"].values,
                         bssorig_c_r1["BSS"].mean()-bss_current[10]["BSS"].values,
                         bssorig_c_r1["BSS"].mean()-bss_current[11]["BSS"].values,
                         bssorig_c_r1["BSS"].mean()-bss_current[12]["BSS"].values,
                         bssorig_c_r1["BSS"].mean()-bss_current[13]["BSS"].values,
                         bssorig_c_r1["BSS"].mean()-bss_current[14]["BSS"].values,
                         bssorig_c_r1["BSS"].mean()-bss_current[15]["BSS"].values,
                         bssorig_c_r1["BSS"].mean()-bss_current[16]["BSS"].values,
                         bssorig_c_r1["BSS"].mean()-bss_current[17]["BSS"].values,
                         bssorig_c_r1["BSS"].mean()-bss_current[18]["BSS"].values,
                         bssorig_c_r1["BSS"].mean()-bss_current[19]["BSS"].values]))

auc_future_ = no_negs(
    np.array([ orig_f_r1["AUC"].mean()-files_future[0]["AUC"].values,
                         orig_f_r1["AUC"].mean()-files_future[1]["AUC"].values,
                         orig_f_r1["AUC"].mean()-files_future[2]["AUC"].values,
                         orig_f_r1["AUC"].mean()-files_future[3]["AUC"].values,
                         orig_f_r1["AUC"].mean()-files_future[4]["AUC"].values,
                         orig_f_r1["AUC"].mean()-files_future[5]["AUC"].values,
                         orig_f_r1["AUC"].mean()-files_future[6]["AUC"].values,
                         orig_f_r1["AUC"].mean()-files_future[7]["AUC"].values,
                         orig_f_r1["AUC"].mean()-files_future[8]["AUC"].values,
                         orig_f_r1["AUC"].mean()-files_future[9]["AUC"].values,
                         orig_f_r1["AUC"].mean()-files_future[10]["AUC"].values,
                         orig_f_r1["AUC"].mean()-files_future[11]["AUC"].values,
                         orig_f_r1["AUC"].mean()-files_future[12]["AUC"].values,
                         orig_f_r1["AUC"].mean()-files_future[13]["AUC"].values,
                         orig_f_r1["AUC"].mean()-files_future[14]["AUC"].values,
                         orig_f_r1["AUC"].mean()-files_future[15]["AUC"].values,
                         orig_f_r1["AUC"].mean()-files_future[16]["AUC"].values,
                         orig_f_r1["AUC"].mean()-files_future[17]["AUC"].values,
                         orig_f_r1["AUC"].mean()-files_future[18]["AUC"].values,
                         orig_f_r1["AUC"].mean()-files_future[19]["AUC"].values]))

csi_future_ = no_negs(
    np.array([ orig_f_r1["CSI"].mean()-files_future[0]["CSI"].values,
                         orig_f_r1["CSI"].mean()-files_future[1]["CSI"].values,
                         orig_f_r1["CSI"].mean()-files_future[2]["CSI"].values,
                         orig_f_r1["CSI"].mean()-files_future[3]["CSI"].values,
                         orig_f_r1["CSI"].mean()-files_future[4]["CSI"].values,
                         orig_f_r1["CSI"].mean()-files_future[5]["CSI"].values,
                         orig_f_r1["CSI"].mean()-files_future[6]["CSI"].values,
                         orig_f_r1["CSI"].mean()-files_future[7]["CSI"].values,
                         orig_f_r1["CSI"].mean()-files_future[8]["CSI"].values,
                         orig_f_r1["CSI"].mean()-files_future[9]["CSI"].values,
                         orig_f_r1["CSI"].mean()-files_future[10]["CSI"].values,
                         orig_f_r1["CSI"].mean()-files_future[11]["CSI"].values,
                         orig_f_r1["CSI"].mean()-files_future[12]["CSI"].values,
                         orig_f_r1["CSI"].mean()-files_future[13]["CSI"].values,
                         orig_f_r1["CSI"].mean()-files_future[14]["CSI"].values,
                         orig_f_r1["CSI"].mean()-files_future[15]["CSI"].values,
                         orig_f_r1["CSI"].mean()-files_future[16]["CSI"].values,
                         orig_f_r1["CSI"].mean()-files_future[17]["CSI"].values,
                         orig_f_r1["CSI"].mean()-files_future[18]["CSI"].values,
                         orig_f_r1["CSI"].mean()-files_future[19]["CSI"].values]))

bss_future_ = no_negs(
    np.array([ bssorig_f_r1["BSS"].mean()-bss_future[0]["BSS"].values,
                         bssorig_f_r1["BSS"].mean()-bss_future[1]["BSS"].values,
                         bssorig_f_r1["BSS"].mean()-bss_future[2]["BSS"].values,
                         bssorig_f_r1["BSS"].mean()-bss_future[3]["BSS"].values,
                         bssorig_f_r1["BSS"].mean()-bss_future[4]["BSS"].values,
                         bssorig_f_r1["BSS"].mean()-bss_future[5]["BSS"].values,
                         bssorig_f_r1["BSS"].mean()-bss_future[6]["BSS"].values,
                         bssorig_f_r1["BSS"].mean()-bss_future[7]["BSS"].values,
                         bssorig_f_r1["BSS"].mean()-bss_future[8]["BSS"].values,
                         bssorig_f_r1["BSS"].mean()-bss_future[9]["BSS"].values,
                         bssorig_f_r1["BSS"].mean()-bss_future[10]["BSS"].values,
                         bssorig_f_r1["BSS"].mean()-bss_future[11]["BSS"].values,
                         bssorig_f_r1["BSS"].mean()-bss_future[12]["BSS"].values,
                         bssorig_f_r1["BSS"].mean()-bss_future[13]["BSS"].values,
                         bssorig_f_r1["BSS"].mean()-bss_future[14]["BSS"].values,
                         bssorig_f_r1["BSS"].mean()-bss_future[15]["BSS"].values,
                         bssorig_f_r1["BSS"].mean()-bss_future[16]["BSS"].values,
                         bssorig_f_r1["BSS"].mean()-bss_future[17]["BSS"].values,
                         bssorig_f_r1["BSS"].mean()-bss_future[18]["BSS"].values,
                         bssorig_f_r1["BSS"].mean()-bss_future[19]["BSS"].values]))

auc_outlier_ = no_negs(
    np.array([orig_o_r1["AUC"].mean()-files_out[0]["AUC"].values,
                         orig_o_r1["AUC"].mean()-files_out[1]["AUC"].values,
                         orig_o_r1["AUC"].mean()-files_out[2]["AUC"].values,
                         orig_o_r1["AUC"].mean()-files_out[3]["AUC"].values,
                         orig_o_r1["AUC"].mean()-files_out[4]["AUC"].values,
                         orig_o_r1["AUC"].mean()-files_out[5]["AUC"].values,
                         orig_o_r1["AUC"].mean()-files_out[6]["AUC"].values,
                         orig_o_r1["AUC"].mean()-files_out[7]["AUC"].values,
                         orig_o_r1["AUC"].mean()-files_out[8]["AUC"].values,
                         orig_o_r1["AUC"].mean()-files_out[9]["AUC"].values,
                         orig_o_r1["AUC"].mean()-files_out[10]["AUC"].values,
                         orig_o_r1["AUC"].mean()-files_out[11]["AUC"].values,
                         orig_o_r1["AUC"].mean()-files_out[12]["AUC"].values,
                         orig_o_r1["AUC"].mean()-files_out[13]["AUC"].values,
                         orig_o_r1["AUC"].mean()-files_out[14]["AUC"].values,
                         orig_o_r1["AUC"].mean()-files_out[15]["AUC"].values,
                         orig_o_r1["AUC"].mean()-files_out[16]["AUC"].values,
                         orig_o_r1["AUC"].mean()-files_out[17]["AUC"].values,
                         orig_o_r1["AUC"].mean()-files_out[18]["AUC"].values,
                         orig_o_r1["AUC"].mean()-files_out[19]["AUC"].values]))

csi_outlier_ = no_negs(
    np.array([orig_o_r1["CSI"].mean()-files_out[0]["CSI"].values,
                         orig_o_r1["CSI"].mean()-files_out[1]["CSI"].values,
                         orig_o_r1["CSI"].mean()-files_out[2]["CSI"].values,
                         orig_o_r1["CSI"].mean()-files_out[3]["CSI"].values,
                         orig_o_r1["CSI"].mean()-files_out[4]["CSI"].values,
                         orig_o_r1["CSI"].mean()-files_out[5]["CSI"].values,
                         orig_o_r1["CSI"].mean()-files_out[6]["CSI"].values,
                         orig_o_r1["CSI"].mean()-files_out[7]["CSI"].values,
                         orig_o_r1["CSI"].mean()-files_out[8]["CSI"].values,
                         orig_o_r1["CSI"].mean()-files_out[9]["CSI"].values,
                         orig_o_r1["CSI"].mean()-files_out[10]["CSI"].values,
                         orig_o_r1["CSI"].mean()-files_out[11]["CSI"].values,
                         orig_o_r1["CSI"].mean()-files_out[12]["CSI"].values,
                         orig_o_r1["CSI"].mean()-files_out[13]["CSI"].values,
                         orig_o_r1["CSI"].mean()-files_out[14]["CSI"].values,
                         orig_o_r1["CSI"].mean()-files_out[15]["CSI"].values,
                         orig_o_r1["CSI"].mean()-files_out[16]["CSI"].values,
                         orig_o_r1["CSI"].mean()-files_out[17]["CSI"].values,
                         orig_o_r1["CSI"].mean()-files_out[18]["CSI"].values,
                         orig_o_r1["CSI"].mean()-files_out[19]["CSI"].values]))

bss_outlier_ = no_negs(
    np.array([bssorig_o_r1["BSS"].mean()-bss_out[0]["BSS"].values,
                         bssorig_o_r1["BSS"].mean()-bss_out[1]["BSS"].values,
                         bssorig_o_r1["BSS"].mean()-bss_out[2]["BSS"].values,
                         bssorig_o_r1["BSS"].mean()-bss_out[3]["BSS"].values,
                         bssorig_o_r1["BSS"].mean()-bss_out[4]["BSS"].values,
                         bssorig_o_r1["BSS"].mean()-bss_out[5]["BSS"].values,
                         bssorig_o_r1["BSS"].mean()-bss_out[6]["BSS"].values,
                         bssorig_o_r1["BSS"].mean()-bss_out[7]["BSS"].values,
                         bssorig_o_r1["BSS"].mean()-bss_out[8]["BSS"].values,
                         bssorig_o_r1["BSS"].mean()-bss_out[9]["BSS"].values,
                         bssorig_o_r1["BSS"].mean()-bss_out[10]["BSS"].values,
                         bssorig_o_r1["BSS"].mean()-bss_out[11]["BSS"].values,
                         bssorig_o_r1["BSS"].mean()-bss_out[12]["BSS"].values,
                         bssorig_o_r1["BSS"].mean()-bss_out[13]["BSS"].values,
                         bssorig_o_r1["BSS"].mean()-bss_out[14]["BSS"].values,
                         bssorig_o_r1["BSS"].mean()-bss_out[15]["BSS"].values,
                         bssorig_o_r1["BSS"].mean()-bss_out[16]["BSS"].values,
                         bssorig_o_r1["BSS"].mean()-bss_out[17]["BSS"].values,
                         bssorig_o_r1["BSS"].mean()-bss_out[18]["BSS"].values,
                         bssorig_o_r1["BSS"].mean()-bss_out[19]["BSS"].values]))

In [32]:
data_to_save = xr.Dataset({
                'auc_current_': (['a','b'], auc_current_),
                'csi_current_': (['a','b'], csi_current_),
                'bss_current_': (['a','b'], bss_current_),

                'auc_future_': (['a','b'], auc_future_[:,:-5]),
                'csi_future_': (['a','b'], csi_future_[:,:-5]),
                'bss_future_': (['a','b'], bss_future_[:,:-5]),

                'auc_outlier_': (['a','c'], auc_outlier_[:,:-1]),
                'csi_outlier_': (['a','c'], csi_outlier_[:,:-1]),
                'bss_outlier_': (['a','c'], bss_outlier_[:,:-1]),   
})

In [35]:
data_to_save.to_netcdf(f'{dlproj_main}/model{model_num}_pfidata.nc')