# Calculate forcast skills of MA

In [1]:
import os
import sys
import json
import numpy as np
import pandas as pd
import xarray as xr

module_path = os.path.abspath(os.path.join('../src'))
if module_path not in sys.path:
    sys.path.append(module_path)
    
from analog import *
from eval_pred import *
from utils import DotDict, nino_indices

%load_ext autoreload
%autoreload 2

In [2]:
# Parameters
test_data = 'real'
out_dir = '../output/MA'
n_analog = 30
vname = 'sst'

# Target region to evaluate spatial skills
lat_slice = (-10, 10)
lon_slice = (120, 290)

# Load additional parameters
with open(f'{out_dir}/param_{test_data}.json', 'r') as f:
    param = json.load(f)
param = DotDict(param)

if vname == 'pr':
    grid = '2.5x2.5'
    leads = np.arange(13)
else:
    grid = '2x2'
    leads = np.arange(19)

In [3]:
# load target data
if vname == 'pr':
    f = f'{param.data_dir}/{vname}_anomaly_{grid}_1987-2016.nc'
else:
    f = f'{param.data_dir}/{vname}_anomaly_{grid}.nc'
ref = xr.open_dataarray(f)

# load library
f = f'{param.library_dir}/{vname}_anomaly_{grid}.nc'
library = xr.open_dataarray(f)

# region for evaluating skills
# ref = ref.sel(lat=slice(-10, 10), lon=slice(120, 290))
# library = library.sel(lat=slice(-10, 10), lon=slice(120, 290))

# load MA indices
ma_idx = xr.open_dataarray(f'{out_dir}/ma_index_{test_data}.nc')

if vname == 'pr':
    ma_idx = ma_idx.sel(year=slice(1987, 2015))

In [4]:
%%time
# Get analog forecasts
af = get_af(library, param.periods.library, ma_idx, n_analog, leads)
afm = af.mean(dim='analog')

CPU times: user 16 s, sys: 57 s, total: 1min 13s
Wall time: 1min 57s


In [5]:
%%time
# All-month stats
t_mse = eval_stats_lead(eval_mse, ref, afm, dim=['year', 'month'])
t_uac = eval_stats_lead(eval_uac, ref, afm, dim=['year', 'month'])

# Monthly stats
t_mse_month = eval_stats_lead(eval_mse, ref, afm, dim='year')
t_uac_month = eval_stats_lead(eval_uac, ref, afm, dim='year')
t_cac_month = eval_stats_lead(eval_r, ref, afm, dim='year')
t_rmsss_month = eval_stats_lead(eval_rmsss, ref, afm, dim='year')
t_msss_month = eval_stats_lead(eval_msss, ref, afm, dim='year')

# Over the equatorial Pacific
xy_mse = eval_stats_lead(
    eval_mse, ref.sel(lat=slice(*lat_slice), lon=slice(*lon_slice)), afm, dim=['lat', 'lon'])
xy_uac = eval_stats_lead(
    eval_uac, ref.sel(lat=slice(*lat_slice), lon=slice(*lon_slice)), afm, dim=['lat', 'lon'])

# Probablistic stats
print('CRPS')
t_crps_month = eval_stats_lead(
    eval_crps_decomp, ref.sel(lat=slice(*lat_slice), lon=slice(*lon_slice)),
    af.sel(lat=slice(*lat_slice), lon=slice(*lon_slice)), dim='year')

# Ensemble spread (time-mean)
print('Ensemble spread')
t_std_month = af.var(dim='analog').mean(dim='year') ** 0.5
t_std_month = t_std_month.rename('std').assign_attrs(long_name='Ensemble spread') 

CRPS
Ensemble spread


  result = getattr(npmodule, name)(values, axis=axis, **kwargs)


CPU times: user 1min 1s, sys: 11.4 s, total: 1min 13s
Wall time: 1min 13s


In [6]:
# Combine
t_stats = xr.merge([
    t_mse.rename('mse').assign_attrs(long_name='Mean square error'), 
    t_uac.rename('uac').assign_attrs(long_name='Uncentered anomaly correlation')
    ])

t_stats_month = xr.merge([
    t_mse_month.rename('mse').assign_attrs(long_name='Mean square error'), 
    t_uac_month.rename('uac').assign_attrs(long_name='Uncentered anomaly correlation'),
    t_cac_month.rename('cac').assign_attrs(long_name='Centered anomaly correlation'),
    t_rmsss_month.rename('rmsss').assign_attrs(long_name='Root mean square skill score'),
    t_msss_month.rename('msss').assign_attrs(long_name='Mean square skill score'),
    ])

xy_stats = xr.merge([
    xy_mse.rename('mse').assign_attrs(long_name='Mean square error'), 
    xy_uac.rename('uac').assign_attrs(long_name='Uncentered anomaly correlation')
    ])

In [7]:
# Save
encoding = {key: {'dtype': 'float32'} for key in list(t_stats.keys())}
t_stats.to_netcdf(f'{out_dir}/{vname}_t_stats_{test_data}.nc', encoding=encoding)

encoding = {key: {'dtype': 'float32'} for key in list(t_stats_month.keys())}
t_stats_month.to_netcdf(f'{out_dir}/{vname}_t_stats_month_{test_data}.nc', encoding=encoding)

encoding = {key: {'dtype': 'float32'} for key in list(xy_stats.keys())}
xy_stats.to_netcdf(f'{out_dir}/{vname}_xy_stats_{test_data}.nc', encoding=encoding)

encoding = {key: {'dtype': 'float32'} for key in list(t_crps_month.keys())}
t_crps_month.to_netcdf(f'{out_dir}/{vname}_t_crps_month_{test_data}.nc', encoding=encoding)

encoding = {'std': {'dtype': 'float32'}}
t_std_month.to_netcdf(f'{out_dir}/{vname}_t_std_month_{test_data}.nc', encoding=encoding)

# Nino indices skills
If vname == 'sst'

In [8]:
%%time
if vname == 'sst':
    nino_ref = nino_indices(ref)
    nino_library = nino_indices(library)

    nino_af = get_af(nino_library, param.periods.library, ma_idx, n_analog, leads)
    nino_afm = nino_af.mean(dim='analog')

    nino_t_uac = eval_stats_lead(eval_uac, nino_ref, nino_afm, dim=['year', 'month'])
    nino_t_uac_month = eval_stats_lead(eval_uac, nino_ref, nino_afm, dim='year')
    nino_t_mse_month = eval_stats_lead(eval_mse, nino_ref, nino_afm, dim='year')
    nino_t_rmsss_month = eval_stats_lead(eval_rmsss, nino_ref, nino_afm, dim='year')

    nino_t_crps_month = eval_stats_lead(eval_crps_decomp, nino_ref['nino34'], nino_af['nino34'], dim='year')
    nino_t_std_month = nino_af.var(dim='analog').mean(dim='year') ** 0.5

    # Save
    encoding = {key: {'dtype': 'float32'} for key in list(nino_ref.keys())}
    nino_t_uac.to_netcdf(f'{out_dir}/nino_t_uac_{test_data}.nc', encoding=encoding)
    nino_t_uac_month.to_netcdf(f'{out_dir}/nino_t_uac_month_{test_data}.nc', encoding=encoding)
    nino_t_mse_month.to_netcdf(f'{out_dir}/nino_t_mse_month_{test_data}.nc', encoding=encoding)
    nino_t_rmsss_month.to_netcdf(f'{out_dir}/nino_t_rmsss_month_{test_data}.nc', encoding=encoding)

    nino_t_std_month.to_netcdf(f'{out_dir}/nino_t_std_month_{test_data}.nc', encoding=encoding)

    encoding = {key: {'dtype': 'float32'} for key in list(nino_t_crps_month.keys())}
    nino_t_crps_month.to_netcdf(f'{out_dir}/nino34_t_crps_month_{test_data}.nc', encoding=encoding)

CPU times: user 3.37 s, sys: 10.9 s, total: 14.3 s
Wall time: 15 s
