# Test the significance of improvements from MA

In [7]:
import os
import sys
import json
import xarray as xr
from scipy.stats import permutation_test

module_path = os.path.abspath(os.path.join('../src'))
if module_path not in sys.path:
    sys.path.append(module_path)

from utils import *
from analog import *
from resample import *

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Prepare data

In [8]:
# Parameters
exp = 'test'
out_dir = f'../output/{exp}'
data_dir = '../data/cesm2'
test_data = 'test'

vname = 'sst'
leads = np.arange(3, 15, 3)
n_resamples = 10000           # Number of resampling
batch = int(n_resamples/50)

with open(f'{out_dir}/hyperparameters.json', 'r') as f:
    hp = json.load(f)
    hp = DotDict(hp)

periods = {
    'library': hp.periods['train'],
    'target': hp.periods[test_data],
}

history = pd.read_csv(f'{out_dir}/history.csv', index_col=0)
epoch = history['val_mse'].argmin()

In [9]:
# Load data for analog forecasting
if vname == 'pr':
    grid = '2.5x2.5'
else:
    grid = '2x2'
    
f = f'{data_dir}/{vname}_anomaly_{grid}.nc'
da = xr.open_dataarray(f)

# da = da.sel(lat=slice(-10, 10), lon=slice(120, 290))

In [10]:
# Base MA
print('Base MA')
f = f'../output/MA/ma_index_{test_data}.nc'
ma_idx = xr.open_dataarray(f)
ma_idx = ma_idx.sel(month=hp.month)

af_base = get_af_month(
    da, hp.month, periods['library'], 
    ma_idx, hp.n_analog, leads)
afm_base = af_base.mean(dim='analog')

Base MA


In [11]:
# MA-ML exp
print(f'{exp} at epoch {epoch}')
f = f'{out_dir}/ma_index_{test_data}_epoch{epoch}.nc'
ma_idx = xr.open_dataarray(f)
af = get_af_month(
    da, hp.month, periods['library'], 
    ma_idx, hp.n_analog, leads)
afm = af.mean(dim='analog')

ch256_sst_ssh_taux_scaled_month01_lr1.5e-05-0 at epoch 45


# Permutation of ACU difference

In [12]:
%%time
t_acu_diff = permute_acu_diff(
    da, afm, afm_base, ['ens', 'year'], 
    month=hp.month, 
    n_resamples=n_resamples, batch=batch,
)

ACU diff: 3-month lead


In [20]:
# Save
encoding = {key: {'dtype': 'float32'} for key in list(t_acu_diff.keys())}
outf = f'{out_dir}/{vname}_t_acu_diff_{test_data}_epoch{epoch}.nc'
t_acu_diff.to_netcdf(outf, encoding=encoding)
print(outf)

../output/ch256_sst_ssh_taux_scaled_month01_lr1.5e-05-0/pr_t_acu_diff_test_epoch45.nc


# Permutation of RMSE skill

In [6]:
%%time
t_nrmse = permute_nrmse(
    da, afm, afm_base, ['ens', 'year'], 
    month=hp.month, 
    n_resamples=n_resamples, batch=batch,
)

NRMSE: 3-month lead
NRMSE: 6-month lead
NRMSE: 9-month lead
NRMSE: 12-month lead
CPU times: user 19.5 s, sys: 3.6 s, total: 23.1 s
Wall time: 23.2 s


In [22]:
# Save
encoding = {key: {'dtype': 'float32'} for key in list(t_nrmse.keys())}
outf = f'{out_dir}/{vname}_t_nrmse_{test_data}_epoch{epoch}.nc'
t_nrmse.to_netcdf(outf, encoding=encoding)
print(outf)

../output/ch256_sst_ssh_taux_scaled_month01_lr1.5e-05-0/pr_t_nrmse_test_epoch45.nc
