# Table 5 (Molina et al., submitted)

Table 5. ACC among the 11-member ensemble mean CESM2 and ERA5 weather regime 2-m temperature and precipitation (September-March, 1999-2019) across 10-70°N and 150-40°W (land only). MSE also shown in parentheses for comparison. Two-tailed statistical significance computed using a 10,000-member bootstrap and indicated in bold-face at the 95% confidence level and with asterisk at the 99% confidence level.

## Imports

In [1]:
import warnings
import numpy as np
import pandas as pd
import xarray as xr

from sklearn.decomposition import PCA
from scipy import stats
from sklearn.cluster import KMeans
from sklearn import metrics

import som_analysis
import cluster_analysis
import narm_analysis

## functions

In [2]:
def get_cold_indx(ds, mo_init=9, mo_end=2):
    """
    Extract indices for cold season.
    Grabbing Sept thru February init, for Oct thru March predictions.
    """
    dt_array = pd.to_datetime(ds['time'])
    return xr.where((dt_array.month >= mo_init) | (
        dt_array.month <= mo_end), True, False)

## open and preprocess data

In [3]:
# region for clustering
lat0 = 10
lat1 = 70
lon0 = -150
lon1 = -40

# open era5 data and slice
ds_era5 = narm_analysis.era5_z500(lat0=lat0, lat1=lat1, lon0=lon0, lon1=lon1)

# era5 anomalies
ds_era5_anom = narm_analysis.era5_climo_wrs(
    ds_era5, rolling_days=5, variable='clim')

# restructure era5 array for machine learning training (SONDJFM)
ds_era5_anom = ds_era5_anom[get_cold_indx(
    ds_era5_anom, mo_init=10, mo_end=3), ...]

ds_era5_train = ds_era5_anom.stack(
    flat=('lat', 'lon')).transpose('time', 'flat').values

## pca and kmeans with era5

In [4]:
# create pca object
pca_obj = PCA(12, whiten=True)

# fit pca with era5
pca_obj = pca_obj.fit(ds_era5_train)

# transform era5 data with pca
ds_era5_train = pca_obj.transform(ds_era5_train)

print(f'Variance explained: {pca_obj.explained_variance_ratio_ * 100}')
print(
    f'Cumulative sum of variance explained for EOF1 and EOF2: {np.cumsum(pca_obj.explained_variance_ratio_) * 100}'
)

# train kmeans
k_means = KMeans(n_clusters=4,
                 init='k-means++',
                 n_init=10000,
                 max_iter=300,
                 tol=0.0001,
                 verbose=0,
                 random_state=0).fit(ds_era5_train)

print(f'inertia: {k_means.inertia_}')

Variance explained: [25.95315607 17.65410568 11.94871708  9.0784389   7.98100848  6.14181738
  4.32605934  2.61658689  2.22642929  2.17049559  1.49813958  1.22541708]
Cumulative sum of variance explained for EOF1 and EOF2: [25.95315607 43.60726175 55.55597883 64.63441774 72.61542622 78.7572436
 83.08330294 85.69988983 87.92631912 90.09681471 91.59495429 92.82037136]
inertia: 39379.20538473213


## load data with lead time bias corrected anomalies

In [5]:
# era5 data
z500_era5, z500_era5_dt = som_analysis.open_era5_files(
    variable='z500', return_time=True,
    lat0=lat0, lat1=lat1, lon0=lon0, lon1=lon1,
    leadday0=0, leadday1=42, rolldays=5,)

# cesm data
z500_cesm, z500_cesm_dt = som_analysis.open_cesm_files(
    variable='zg_500', return_time=True,
    lat0=lat0, lat1=lat1, lon0=lon0, lon1=lon1,
    leadday0=0, leadday1=42, rolldays=5,)

# restructure arrays
z500_standard_era5 = z500_era5.stack(
    new=('time', 'lead'), flat=('lat', 'lon')).transpose('new', 'flat')

z500_standard_cesm = z500_cesm.stack(
    new=('time', 'lead'), flat=('lat', 'lon')).transpose('new', 'flat')

## composites of the weather types/regimes

In [6]:
# grab cluster indices

z500_era5_tmp_1, z500_era5_tmp_2, z500_era5_tmp_3, z500_era5_tmp_4 = cluster_analysis.composite_clusters_indx(
    z500_standard_era5, k_means, pca_obj, use_pca=True)

z500_cesm_tmp_1, z500_cesm_tmp_2, z500_cesm_tmp_3, z500_cesm_tmp_4 = cluster_analysis.composite_clusters_indx(
    z500_standard_cesm, k_means, pca_obj, use_pca=True)

## precip and temperature anomalies

In [None]:
lat0_tmp = 10
lat1_tmp = 75
lon0_tmp = -165
lon1_tmp = -40

# temperature

# noaa data
t2m_noaa, _ = som_analysis.open_noaa_files(
    variable='temp', return_time=True,
    lat0=lat0_tmp, lat1=lat1_tmp, lon0=lon0_tmp, lon1=lon1_tmp,
    leadday0=0, leadday1=42, rolldays=1,)

mask = xr.where(~np.isnan(t2m_noaa.isel(time=0, lead=0)), 1.0, np.nan)

# era5 data
t2m_era5, _ = som_analysis.open_era5_files(
    variable='temp', return_time=True,
    lat0=lat0_tmp, lat1=lat1_tmp, lon0=lon0_tmp, lon1=lon1_tmp,
    leadday0=0, leadday1=42, rolldays=1,)

t2m_era5 = t2m_era5.where(mask == 1.0)

# cesm data
t2m_cesm, _ = som_analysis.open_cesm_files(
    variable='tas_2m', return_time=True,
    lat0=lat0_tmp, lat1=lat1_tmp, lon0=lon0_tmp, lon1=lon1_tmp,
    leadday0=0, leadday1=42, rolldays=1,)

t2m_cesm = t2m_cesm.where(mask == 1.0)

# precip

# era5 data
pr_era5, _ = som_analysis.open_era5_files(
    variable='tp', return_time=True,
    lat0=lat0_tmp, lat1=lat1_tmp, lon0=lon0_tmp, lon1=lon1_tmp,
    leadday0=0, leadday1=42, rolldays=1,)

pr_era5 = pr_era5.where(mask == 1.0)

# cesm data
pr_cesm, _ = som_analysis.open_cesm_files(
    variable='pr_sfc', return_time=True,
    lat0=lat0_tmp, lat1=lat1_tmp, lon0=lon0_tmp, lon1=lon1_tmp,
    leadday0=0, leadday1=42, rolldays=1,)

pr_cesm = pr_cesm.where(mask == 1.0)

In [None]:
t2m_era5_01 = t2m_era5.stack(new=('time', 'lead')).transpose(
    'new', 'lat', 'lon')[z500_era5_tmp_1, :, :]
t2m_era5_02 = t2m_era5.stack(new=('time', 'lead')).transpose(
    'new', 'lat', 'lon')[z500_era5_tmp_2, :, :]
t2m_era5_03 = t2m_era5.stack(new=('time', 'lead')).transpose(
    'new', 'lat', 'lon')[z500_era5_tmp_3, :, :]
t2m_era5_04 = t2m_era5.stack(new=('time', 'lead')).transpose(
    'new', 'lat', 'lon')[z500_era5_tmp_4, :, :]

t2m_cesm_01 = t2m_cesm.stack(new=('time', 'lead')).transpose(
    'new', 'lat', 'lon')[z500_cesm_tmp_1, :, :]
t2m_cesm_02 = t2m_cesm.stack(new=('time', 'lead')).transpose(
    'new', 'lat', 'lon')[z500_cesm_tmp_2, :, :]
t2m_cesm_03 = t2m_cesm.stack(new=('time', 'lead')).transpose(
    'new', 'lat', 'lon')[z500_cesm_tmp_3, :, :]
t2m_cesm_04 = t2m_cesm.stack(new=('time', 'lead')).transpose(
    'new', 'lat', 'lon')[z500_cesm_tmp_4, :, :]


pr_era5_01 = pr_era5.stack(new=('time', 'lead')).transpose(
    'new', 'lat', 'lon')[z500_era5_tmp_1, :, :]
pr_era5_02 = pr_era5.stack(new=('time', 'lead')).transpose(
    'new', 'lat', 'lon')[z500_era5_tmp_2, :, :]
pr_era5_03 = pr_era5.stack(new=('time', 'lead')).transpose(
    'new', 'lat', 'lon')[z500_era5_tmp_3, :, :]
pr_era5_04 = pr_era5.stack(new=('time', 'lead')).transpose(
    'new', 'lat', 'lon')[z500_era5_tmp_4, :, :]

pr_cesm_01 = pr_cesm.stack(new=('time', 'lead')).transpose(
    'new', 'lat', 'lon')[z500_cesm_tmp_1, :, :]
pr_cesm_02 = pr_cesm.stack(new=('time', 'lead')).transpose(
    'new', 'lat', 'lon')[z500_cesm_tmp_2, :, :]
pr_cesm_03 = pr_cesm.stack(new=('time', 'lead')).transpose(
    'new', 'lat', 'lon')[z500_cesm_tmp_3, :, :]
pr_cesm_04 = pr_cesm.stack(new=('time', 'lead')).transpose(
    'new', 'lat', 'lon')[z500_cesm_tmp_4, :, :]

## temperature anomalies

## weeks 1-2

In [10]:
firstday = 1
seconday = 14

# ---

temp_era5_01 = t2m_era5_01.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).mean('new', skipna=True).where(mask == 1.0)

temp_era5_02 = t2m_era5_02.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).mean('new', skipna=True).where(mask == 1.0)

temp_era5_03 = t2m_era5_03.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).mean('new', skipna=True).where(mask == 1.0)

temp_era5_04 = t2m_era5_04.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).mean('new', skipna=True).where(mask == 1.0)

# ---

temp_data = t2m_cesm_01.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).mean('new', skipna=True).where(mask == 1.0)

r = np.around(
    stats.pearsonr(
        temp_era5_01.values.flatten()[~np.isnan(temp_era5_01.values.flatten())],
        temp_data.values.flatten()[~np.isnan(temp_data.values.flatten())])[0], 2)

mse = np.around(
    metrics.mean_squared_error(
        temp_era5_01.values.flatten()[~np.isnan(temp_era5_01.values.flatten())],
        temp_data.values.flatten()[~np.isnan(temp_data.values.flatten())]), 2)

print(f'R={r}, MSE={mse}')

# ---

temp_data = t2m_cesm_02.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).mean('new', skipna=True).where(mask == 1.0)

r = np.around(
    stats.pearsonr(
        temp_era5_02.values.flatten()[~np.isnan(temp_era5_02.values.flatten())],
        temp_data.values.flatten()[~np.isnan(temp_data.values.flatten())])[0], 2)

mse = np.around(
    metrics.mean_squared_error(
        temp_era5_02.values.flatten()[~np.isnan(temp_era5_02.values.flatten())],
        temp_data.values.flatten()[~np.isnan(temp_data.values.flatten())]), 2)

print(f'R={r}, MSE={mse}')

# ---

temp_data = t2m_cesm_03.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).mean('new', skipna=True).where(mask == 1.0)

r = np.around(
    stats.pearsonr(
        temp_era5_03.values.flatten()[~np.isnan(temp_era5_03.values.flatten())],
        temp_data.values.flatten()[~np.isnan(temp_data.values.flatten())])[0], 2)

mse = np.around(
    metrics.mean_squared_error(
        temp_era5_03.values.flatten()[~np.isnan(temp_era5_03.values.flatten())],
        temp_data.values.flatten()[~np.isnan(temp_data.values.flatten())]), 2)

print(f'R={r}, MSE={mse}')

# ---

temp_data = t2m_cesm_04.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).mean('new', skipna=True).where(mask == 1.0)

r = np.around(
    stats.pearsonr(
        temp_era5_04.values.flatten()[~np.isnan(temp_era5_04.values.flatten())],
        temp_data.values.flatten()[~np.isnan(temp_data.values.flatten())])[0], 2)

mse = np.around(
    metrics.mean_squared_error(
        temp_era5_04.values.flatten()[~np.isnan(temp_era5_04.values.flatten())],
        temp_data.values.flatten()[~np.isnan(temp_data.values.flatten())]), 2)

print(f'R={r}, MSE={mse}')

# ---

R=0.98, MSE=0.51
R=0.92, MSE=0.24
R=0.97, MSE=0.19
R=0.92, MSE=0.11


## bootstrap

In [10]:
# ---

firstday = 1
seconday = 14
boot_iter_num = 10000

tmp_data1 = z500_era5_tmp_1.shape[0]
tmp_data2 = z500_cesm_tmp_1.shape[0]

# ---

all_era50 = t2m_era5.isel(lead=slice(firstday, seconday)).stack(
    new=('time', 'lead')).transpose('new', 'lat', 'lon').where(
    mask == 1.0).values
all_cesm0 = t2m_cesm.isel(lead=slice(firstday, seconday)).stack(
    new=('time', 'lead')).transpose('new', 'lat', 'lon').where(
    mask == 1.0).values

# ---

for ind in range(0, boot_iter_num):

    np.random.seed(ind)
    rand_indx1 = [np.random.choice(
        all_era50.shape[0]) for i in range(tmp_data1)]

    np.random.seed(ind + 1)
    rand_indx2 = [np.random.choice(
        all_cesm0.shape[0]) for i in range(tmp_data2)]

    tmp1 = all_era50[rand_indx1, ...]
    tmp2 = all_cesm0[rand_indx2, ...]

    with warnings.catch_warnings():

        warnings.simplefilter('ignore')

        tmp1 = np.nanmean(tmp1, axis=0)
        tmp2 = np.nanmean(tmp2, axis=0)

    tmp1 = tmp1.flatten()[~np.isnan(tmp1.flatten())]
    tmp2 = tmp2.flatten()[~np.isnan(tmp2.flatten())]

    boot_acc = stats.pearsonr(tmp1, tmp2)[0]
    boot_mse = metrics.mean_squared_error(tmp1, tmp2)

    xr.Dataset(
        data_vars=dict(
            iteration_acc=(["iters"], np.array([boot_acc])),
            iteration_mse=(["iters"], np.array([boot_mse]))
        ),
        attrs=dict(description="For bootstrap confidence intervals."),
    ).to_netcdf(
        f'/glade/scratch/molina/s2s/bootstrap/table5_temp_wr1_wk12/table5_boot_{ind + 1}.nc')

In [None]:
# ---

firstday = 1
seconday = 14
boot_iter_num = 10000

tmp_data1 = z500_era5_tmp_2.shape[0]
tmp_data2 = z500_cesm_tmp_2.shape[0]

# ---

all_era50 = t2m_era5.isel(lead=slice(firstday, seconday)).stack(
    new=('time', 'lead')).transpose('new', 'lat', 'lon').where(
    mask == 1.0).values
all_cesm0 = t2m_cesm.isel(lead=slice(firstday, seconday)).stack(
    new=('time', 'lead')).transpose('new', 'lat', 'lon').where(
    mask == 1.0).values

# ---

for ind in range(0, boot_iter_num):

    np.random.seed(ind)
    rand_indx1 = [np.random.choice(
        all_era50.shape[0]) for i in range(tmp_data1)]

    np.random.seed(ind + 1)
    rand_indx2 = [np.random.choice(
        all_cesm0.shape[0]) for i in range(tmp_data2)]

    tmp1 = all_era50[rand_indx1, ...]
    tmp2 = all_cesm0[rand_indx2, ...]

    with warnings.catch_warnings():

        warnings.simplefilter('ignore')

        tmp1 = np.nanmean(tmp1, axis=0)
        tmp2 = np.nanmean(tmp2, axis=0)

    tmp1 = tmp1.flatten()[~np.isnan(tmp1.flatten())]
    tmp2 = tmp2.flatten()[~np.isnan(tmp2.flatten())]

    boot_acc = stats.pearsonr(tmp1, tmp2)[0]
    boot_mse = metrics.mean_squared_error(tmp1, tmp2)

    xr.Dataset(
        data_vars=dict(
            iteration_acc=(["iters"], np.array([boot_acc])),
            iteration_mse=(["iters"], np.array([boot_mse]))
        ),
        attrs=dict(description="For bootstrap confidence intervals."),
    ).to_netcdf(
        f'/glade/scratch/molina/s2s/bootstrap/table5_temp_wr2_wk12/table5_boot_{ind + 1}.nc')

In [None]:
# ---

firstday = 1
seconday = 14
boot_iter_num = 10000

tmp_data1 = z500_era5_tmp_3.shape[0]
tmp_data2 = z500_cesm_tmp_3.shape[0]

# ---

all_era50 = t2m_era5.isel(lead=slice(firstday, seconday)).stack(
    new=('time', 'lead')).transpose('new', 'lat', 'lon').where(
    mask == 1.0).values
all_cesm0 = t2m_cesm.isel(lead=slice(firstday, seconday)).stack(
    new=('time', 'lead')).transpose('new', 'lat', 'lon').where(
    mask == 1.0).values

# ---

for ind in range(7305, boot_iter_num):

    np.random.seed(ind)
    rand_indx1 = [np.random.choice(
        all_era50.shape[0]) for i in range(tmp_data1)]

    np.random.seed(ind + 1)
    rand_indx2 = [np.random.choice(
        all_cesm0.shape[0]) for i in range(tmp_data2)]

    tmp1 = all_era50[rand_indx1, ...]
    tmp2 = all_cesm0[rand_indx2, ...]

    with warnings.catch_warnings():

        warnings.simplefilter('ignore')

        tmp1 = np.nanmean(tmp1, axis=0)
        tmp2 = np.nanmean(tmp2, axis=0)

    tmp1 = tmp1.flatten()[~np.isnan(tmp1.flatten())]
    tmp2 = tmp2.flatten()[~np.isnan(tmp2.flatten())]

    boot_acc = stats.pearsonr(tmp1, tmp2)[0]
    boot_mse = metrics.mean_squared_error(tmp1, tmp2)

    xr.Dataset(
        data_vars=dict(
            iteration_acc=(["iters"], np.array([boot_acc])),
            iteration_mse=(["iters"], np.array([boot_mse]))
        ),
        attrs=dict(description="For bootstrap confidence intervals."),
    ).to_netcdf(
        f'/glade/scratch/molina/s2s/bootstrap/table5_temp_wr3_wk12/table5_boot_{ind + 1}.nc')

In [None]:
# ---

firstday = 1
seconday = 14
boot_iter_num = 10000

tmp_data1 = z500_era5_tmp_4.shape[0]
tmp_data2 = z500_cesm_tmp_4.shape[0]

# ---

all_era50 = t2m_era5.isel(lead=slice(firstday, seconday)).stack(
    new=('time', 'lead')).transpose('new', 'lat', 'lon').where(
    mask == 1.0).values
all_cesm0 = t2m_cesm.isel(lead=slice(firstday, seconday)).stack(
    new=('time', 'lead')).transpose('new', 'lat', 'lon').where(
    mask == 1.0).values

# ---

for ind in range(0, boot_iter_num):

    np.random.seed(ind)
    rand_indx1 = [np.random.choice(
        all_era50.shape[0]) for i in range(tmp_data1)]

    np.random.seed(ind + 1)
    rand_indx2 = [np.random.choice(
        all_cesm0.shape[0]) for i in range(tmp_data2)]

    tmp1 = all_era50[rand_indx1, ...]
    tmp2 = all_cesm0[rand_indx2, ...]

    with warnings.catch_warnings():

        warnings.simplefilter('ignore')

        tmp1 = np.nanmean(tmp1, axis=0)
        tmp2 = np.nanmean(tmp2, axis=0)

    tmp1 = tmp1.flatten()[~np.isnan(tmp1.flatten())]
    tmp2 = tmp2.flatten()[~np.isnan(tmp2.flatten())]

    boot_acc = stats.pearsonr(tmp1, tmp2)[0]
    boot_mse = metrics.mean_squared_error(tmp1, tmp2)

    xr.Dataset(
        data_vars=dict(
            iteration_acc=(["iters"], np.array([boot_acc])),
            iteration_mse=(["iters"], np.array([boot_mse]))
        ),
        attrs=dict(description="For bootstrap confidence intervals."),
    ).to_netcdf(
        f'/glade/scratch/molina/s2s/bootstrap/table5_temp_wr4_wk12/table5_boot_{ind + 1}.nc')

In [9]:
lev_1 = 0.025
lev_2 = 0.975
lev_3 = 0.005
lev_4 = 0.995

table5_temp_wr1_wk12 = xr.open_mfdataset(
    '/glade/scratch/molina/s2s/bootstrap/table5_temp_wr1_wk12/table5_boot_*.nc',
    combine='nested', concat_dim='iters').chunk(
    dict(iters=-1)).quantile([lev_1, lev_2, lev_3, lev_4],
                             dim='iters', skipna=True)

table5_temp_wr2_wk12 = xr.open_mfdataset(
    '/glade/scratch/molina/s2s/bootstrap/table5_temp_wr2_wk12/table5_boot_*.nc',
    combine='nested', concat_dim='iters').chunk(
    dict(iters=-1)).quantile([lev_1, lev_2, lev_3, lev_4],
                             dim='iters', skipna=True)

table5_temp_wr3_wk12 = xr.open_mfdataset(
    '/glade/scratch/molina/s2s/bootstrap/table5_temp_wr3_wk12/table5_boot_*.nc',
    combine='nested', concat_dim='iters').chunk(
    dict(iters=-1)).quantile([lev_1, lev_2, lev_3, lev_4],
                             dim='iters', skipna=True)

table5_temp_wr4_wk12 = xr.open_mfdataset(
    '/glade/scratch/molina/s2s/bootstrap/table5_temp_wr4_wk12/table5_boot_*.nc',
    combine='nested', concat_dim='iters').chunk(
    dict(iters=-1)).quantile([lev_1, lev_2, lev_3, lev_4],
                             dim='iters', skipna=True)

## weeks 3-4

In [11]:
firstday = 15
seconday = 28

# ---

temp_era5_01 = t2m_era5_01.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).mean('new', skipna=True).where(mask == 1.0)

temp_era5_02 = t2m_era5_02.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).mean('new', skipna=True).where(mask == 1.0)

temp_era5_03 = t2m_era5_03.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).mean('new', skipna=True).where(mask == 1.0)

temp_era5_04 = t2m_era5_04.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).mean('new', skipna=True).where(mask == 1.0)

# ---

temp_data = t2m_cesm_01.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).mean('new', skipna=True).where(mask == 1.0)

r = np.around(
    stats.pearsonr(
        temp_era5_01.values.flatten()[~np.isnan(temp_era5_01.values.flatten())],
        temp_data.values.flatten()[~np.isnan(temp_data.values.flatten())])[0], 2)

mse = np.around(
    metrics.mean_squared_error(
        temp_era5_01.values.flatten()[~np.isnan(temp_era5_01.values.flatten())],
        temp_data.values.flatten()[~np.isnan(temp_data.values.flatten())]), 2)

print(f'R={r}, MSE={mse}')

# ---

temp_data = t2m_cesm_02.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).mean('new', skipna=True).where(mask == 1.0)

r = np.around(
    stats.pearsonr(
        temp_era5_02.values.flatten()[~np.isnan(temp_era5_02.values.flatten())],
        temp_data.values.flatten()[~np.isnan(temp_data.values.flatten())])[0], 2)

mse = np.around(
    metrics.mean_squared_error(
        temp_era5_02.values.flatten()[~np.isnan(temp_era5_02.values.flatten())],
        temp_data.values.flatten()[~np.isnan(temp_data.values.flatten())]), 2)

print(f'R={r}, MSE={mse}')

# ---

temp_data = t2m_cesm_03.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).mean('new', skipna=True).where(mask == 1.0)

r = np.around(
    stats.pearsonr(
        temp_era5_03.values.flatten()[~np.isnan(temp_era5_03.values.flatten())],
        temp_data.values.flatten()[~np.isnan(temp_data.values.flatten())])[0], 2)

mse = np.around(
    metrics.mean_squared_error(
        temp_era5_03.values.flatten()[~np.isnan(temp_era5_03.values.flatten())],
        temp_data.values.flatten()[~np.isnan(temp_data.values.flatten())]), 2)

print(f'R={r}, MSE={mse}')

# ---

temp_data = t2m_cesm_04.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).mean('new', skipna=True).where(mask == 1.0)

r = np.around(
    stats.pearsonr(
        temp_era5_04.values.flatten()[~np.isnan(temp_era5_04.values.flatten())],
        temp_data.values.flatten()[~np.isnan(temp_data.values.flatten())])[0], 5)

mse = np.around(
    metrics.mean_squared_error(
        temp_era5_04.values.flatten()[~np.isnan(temp_era5_04.values.flatten())],
        temp_data.values.flatten()[~np.isnan(temp_data.values.flatten())]), 2)

print(f'R={r}, MSE={mse}')

# ---

R=0.94, MSE=2.03
R=0.86, MSE=0.58
R=0.9, MSE=0.89
R=0.83816, MSE=0.29


## bootstrap (weeks 3-4)

In [None]:
# ---

firstday = 15
seconday = 28
boot_iter_num = 10000

tmp_data1 = z500_era5_tmp_1.shape[0]
tmp_data2 = z500_cesm_tmp_1.shape[0]

# ---

all_era50 = t2m_era5.isel(lead=slice(firstday, seconday)).stack(
    new=('time', 'lead')).transpose('new', 'lat', 'lon').where(
    mask == 1.0).values
all_cesm0 = t2m_cesm.isel(lead=slice(firstday, seconday)).stack(
    new=('time', 'lead')).transpose('new', 'lat', 'lon').where(
    mask == 1.0).values

# ---

for ind in range(0, boot_iter_num):

    np.random.seed(ind)
    rand_indx1 = [np.random.choice(
        all_era50.shape[0]) for i in range(tmp_data1)]

    np.random.seed(ind + 1)
    rand_indx2 = [np.random.choice(
        all_cesm0.shape[0]) for i in range(tmp_data2)]

    tmp1 = all_era50[rand_indx1, ...]
    tmp2 = all_cesm0[rand_indx2, ...]

    with warnings.catch_warnings():

        warnings.simplefilter('ignore')

        tmp1 = np.nanmean(tmp1, axis=0)
        tmp2 = np.nanmean(tmp2, axis=0)

    tmp1 = tmp1.flatten()[~np.isnan(tmp1.flatten())]
    tmp2 = tmp2.flatten()[~np.isnan(tmp2.flatten())]

    boot_acc = stats.pearsonr(tmp1, tmp2)[0]
    boot_mse = metrics.mean_squared_error(tmp1, tmp2)

    xr.Dataset(
        data_vars=dict(
            iteration_acc=(["iters"], np.array([boot_acc])),
            iteration_mse=(["iters"], np.array([boot_mse]))
        ),
        attrs=dict(description="For bootstrap confidence intervals."),
    ).to_netcdf(
        f'/glade/scratch/molina/s2s/bootstrap/table5_temp_wr1_wk34/table5_boot_{ind + 1}.nc')

In [10]:
# ---

firstday = 15
seconday = 28
boot_iter_num = 10000

tmp_data1 = z500_era5_tmp_2.shape[0]
tmp_data2 = z500_cesm_tmp_2.shape[0]

# ---

all_era50 = t2m_era5.isel(lead=slice(firstday, seconday)).stack(
    new=('time', 'lead')).transpose('new', 'lat', 'lon').where(
    mask == 1.0).values
all_cesm0 = t2m_cesm.isel(lead=slice(firstday, seconday)).stack(
    new=('time', 'lead')).transpose('new', 'lat', 'lon').where(
    mask == 1.0).values

# ---

for ind in range(541, boot_iter_num):

    np.random.seed(ind)
    rand_indx1 = [np.random.choice(
        all_era50.shape[0]) for i in range(tmp_data1)]

    np.random.seed(ind + 1)
    rand_indx2 = [np.random.choice(
        all_cesm0.shape[0]) for i in range(tmp_data2)]

    tmp1 = all_era50[rand_indx1, ...]
    tmp2 = all_cesm0[rand_indx2, ...]

    with warnings.catch_warnings():

        warnings.simplefilter('ignore')

        tmp1 = np.nanmean(tmp1, axis=0)
        tmp2 = np.nanmean(tmp2, axis=0)

    tmp1 = tmp1.flatten()[~np.isnan(tmp1.flatten())]
    tmp2 = tmp2.flatten()[~np.isnan(tmp2.flatten())]

    boot_acc = stats.pearsonr(tmp1, tmp2)[0]
    boot_mse = metrics.mean_squared_error(tmp1, tmp2)

    xr.Dataset(
        data_vars=dict(
            iteration_acc=(["iters"], np.array([boot_acc])),
            iteration_mse=(["iters"], np.array([boot_mse]))
        ),
        attrs=dict(description="For bootstrap confidence intervals."),
    ).to_netcdf(
        f'/glade/scratch/molina/s2s/bootstrap/table5_temp_wr2_wk34/table5_boot_{ind + 1}.nc')

In [None]:
# ---

firstday = 15
seconday = 28
boot_iter_num = 10000

tmp_data1 = z500_era5_tmp_3.shape[0]
tmp_data2 = z500_cesm_tmp_3.shape[0]

# ---

all_era50 = t2m_era5.isel(lead=slice(firstday, seconday)).stack(
    new=('time', 'lead')).transpose('new', 'lat', 'lon').where(
    mask == 1.0).values
all_cesm0 = t2m_cesm.isel(lead=slice(firstday, seconday)).stack(
    new=('time', 'lead')).transpose('new', 'lat', 'lon').where(
    mask == 1.0).values

# ---

for ind in range(8979, boot_iter_num):

    np.random.seed(ind)
    rand_indx1 = [np.random.choice(
        all_era50.shape[0]) for i in range(tmp_data1)]

    np.random.seed(ind + 1)
    rand_indx2 = [np.random.choice(
        all_cesm0.shape[0]) for i in range(tmp_data2)]

    tmp1 = all_era50[rand_indx1, ...]
    tmp2 = all_cesm0[rand_indx2, ...]

    with warnings.catch_warnings():

        warnings.simplefilter('ignore')

        tmp1 = np.nanmean(tmp1, axis=0)
        tmp2 = np.nanmean(tmp2, axis=0)

    tmp1 = tmp1.flatten()[~np.isnan(tmp1.flatten())]
    tmp2 = tmp2.flatten()[~np.isnan(tmp2.flatten())]

    boot_acc = stats.pearsonr(tmp1, tmp2)[0]
    boot_mse = metrics.mean_squared_error(tmp1, tmp2)

    xr.Dataset(
        data_vars=dict(
            iteration_acc=(["iters"], np.array([boot_acc])),
            iteration_mse=(["iters"], np.array([boot_mse]))
        ),
        attrs=dict(description="For bootstrap confidence intervals."),
    ).to_netcdf(
        f'/glade/scratch/molina/s2s/bootstrap/table5_temp_wr3_wk34/table5_boot_{ind + 1}.nc')

In [11]:
# ---

firstday = 15
seconday = 28
boot_iter_num = 10000

tmp_data1 = z500_era5_tmp_4.shape[0]
tmp_data2 = z500_cesm_tmp_4.shape[0]

# ---

all_era50 = t2m_era5.isel(lead=slice(firstday, seconday)).stack(
    new=('time', 'lead')).transpose('new', 'lat', 'lon').where(
    mask == 1.0).values
all_cesm0 = t2m_cesm.isel(lead=slice(firstday, seconday)).stack(
    new=('time', 'lead')).transpose('new', 'lat', 'lon').where(
    mask == 1.0).values

# ---

for ind in range(0, boot_iter_num):

    np.random.seed(ind)
    rand_indx1 = [np.random.choice(
        all_era50.shape[0]) for i in range(tmp_data1)]

    np.random.seed(ind + 1)
    rand_indx2 = [np.random.choice(
        all_cesm0.shape[0]) for i in range(tmp_data2)]

    tmp1 = all_era50[rand_indx1, ...]
    tmp2 = all_cesm0[rand_indx2, ...]

    with warnings.catch_warnings():

        warnings.simplefilter('ignore')

        tmp1 = np.nanmean(tmp1, axis=0)
        tmp2 = np.nanmean(tmp2, axis=0)

    tmp1 = tmp1.flatten()[~np.isnan(tmp1.flatten())]
    tmp2 = tmp2.flatten()[~np.isnan(tmp2.flatten())]

    boot_acc = stats.pearsonr(tmp1, tmp2)[0]
    boot_mse = metrics.mean_squared_error(tmp1, tmp2)

    xr.Dataset(
        data_vars=dict(
            iteration_acc=(["iters"], np.array([boot_acc])),
            iteration_mse=(["iters"], np.array([boot_mse]))
        ),
        attrs=dict(description="For bootstrap confidence intervals."),
    ).to_netcdf(
        f'/glade/scratch/molina/s2s/bootstrap/table5_temp_wr4_wk34/table5_boot_{ind + 1}.nc')

In [10]:
lev_1 = 0.025
lev_2 = 0.975
lev_3 = 0.005
lev_4 = 0.995

table5_temp_wr1_wk34 = xr.open_mfdataset(
    '/glade/scratch/molina/s2s/bootstrap/table5_temp_wr1_wk34/table5_boot_*.nc',
    combine='nested', concat_dim='iters').chunk(
    dict(iters=-1)).quantile([lev_1, lev_2, lev_3, lev_4],
                             dim='iters', skipna=True)

table5_temp_wr2_wk34 = xr.open_mfdataset(
    '/glade/scratch/molina/s2s/bootstrap/table5_temp_wr2_wk34/table5_boot_*.nc',
    combine='nested', concat_dim='iters').chunk(
    dict(iters=-1)).quantile([lev_1, lev_2, lev_3, lev_4],
                             dim='iters', skipna=True)

table5_temp_wr3_wk34 = xr.open_mfdataset(
    '/glade/scratch/molina/s2s/bootstrap/table5_temp_wr3_wk34/table5_boot_*.nc',
    combine='nested', concat_dim='iters').chunk(
    dict(iters=-1)).quantile([lev_1, lev_2, lev_3, lev_4],
                             dim='iters', skipna=True)

table5_temp_wr4_wk34 = xr.open_mfdataset(
    '/glade/scratch/molina/s2s/bootstrap/table5_temp_wr4_wk34/table5_boot_*.nc',
    combine='nested', concat_dim='iters').chunk(
    dict(iters=-1)).quantile([lev_1, lev_2, lev_3, lev_4],
                             dim='iters', skipna=True)

## weeks 5-6

In [17]:
firstday = 29
seconday = 42

# ---

temp_era5_01 = t2m_era5_01.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).mean('new', skipna=True).where(mask == 1.0)

temp_era5_02 = t2m_era5_02.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).mean('new', skipna=True).where(mask == 1.0)

temp_era5_03 = t2m_era5_03.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).mean('new', skipna=True).where(mask == 1.0)

temp_era5_04 = t2m_era5_04.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).mean('new', skipna=True).where(mask == 1.0)

# ---

temp_data = t2m_cesm_01.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).mean('new', skipna=True).where(mask == 1.0)

r = np.around(
    stats.pearsonr(
        temp_era5_01.values.flatten()[~np.isnan(temp_era5_01.values.flatten())],
        temp_data.values.flatten()[~np.isnan(temp_data.values.flatten())])[0], 2)

mse = np.around(
    metrics.mean_squared_error(
        temp_era5_01.values.flatten()[~np.isnan(temp_era5_01.values.flatten())],
        temp_data.values.flatten()[~np.isnan(temp_data.values.flatten())]), 2)

print(f'R={r}, MSE={mse}')

# ---

temp_data = t2m_cesm_02.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).mean('new', skipna=True).where(mask == 1.0)

r = np.around(
    stats.pearsonr(
        temp_era5_02.values.flatten()[~np.isnan(temp_era5_02.values.flatten())],
        temp_data.values.flatten()[~np.isnan(temp_data.values.flatten())])[0], 2)

mse = np.around(
    metrics.mean_squared_error(
        temp_era5_02.values.flatten()[~np.isnan(temp_era5_02.values.flatten())],
        temp_data.values.flatten()[~np.isnan(temp_data.values.flatten())]), 2)

print(f'R={r}, MSE={mse}')

# ---

temp_data = t2m_cesm_03.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).mean('new', skipna=True).where(mask == 1.0)

r = np.around(
    stats.pearsonr(
        temp_era5_03.values.flatten()[~np.isnan(temp_era5_03.values.flatten())],
        temp_data.values.flatten()[~np.isnan(temp_data.values.flatten())])[0], 2)

mse = np.around(
    metrics.mean_squared_error(
        temp_era5_03.values.flatten()[~np.isnan(temp_era5_03.values.flatten())],
        temp_data.values.flatten()[~np.isnan(temp_data.values.flatten())]), 2)

print(f'R={r}, MSE={mse}')

# ---

temp_data = t2m_cesm_04.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).mean('new', skipna=True).where(mask == 1.0)

r = stats.pearsonr(
        temp_era5_04.values.flatten()[~np.isnan(temp_era5_04.values.flatten())],
        temp_data.values.flatten()[~np.isnan(temp_data.values.flatten())])[0]

mse = np.around(
    metrics.mean_squared_error(
        temp_era5_04.values.flatten()[~np.isnan(temp_era5_04.values.flatten())],
        temp_data.values.flatten()[~np.isnan(temp_data.values.flatten())]), 2)

print(f'R={r}, MSE={mse}')

# ---

R=0.94, MSE=2.44
R=0.91, MSE=0.88
R=0.94, MSE=1.18
R=0.853418302610909, MSE=0.41


## bootstrap (weeks 5-6)

In [None]:
# ---

firstday = 29
seconday = 42
boot_iter_num = 10000

tmp_data1 = z500_era5_tmp_1.shape[0]
tmp_data2 = z500_cesm_tmp_1.shape[0]

# ---

all_era50 = t2m_era5.isel(lead=slice(firstday, seconday)).stack(
    new=('time', 'lead')).transpose('new', 'lat', 'lon').where(
    mask == 1.0).values
all_cesm0 = t2m_cesm.isel(lead=slice(firstday, seconday)).stack(
    new=('time', 'lead')).transpose('new', 'lat', 'lon').where(
    mask == 1.0).values

# ---

for ind in range(0, boot_iter_num):

    np.random.seed(ind)
    rand_indx1 = [np.random.choice(
        all_era50.shape[0]) for i in range(tmp_data1)]

    np.random.seed(ind + 1)
    rand_indx2 = [np.random.choice(
        all_cesm0.shape[0]) for i in range(tmp_data2)]

    tmp1 = all_era50[rand_indx1, ...]
    tmp2 = all_cesm0[rand_indx2, ...]

    with warnings.catch_warnings():

        warnings.simplefilter('ignore')

        tmp1 = np.nanmean(tmp1, axis=0)
        tmp2 = np.nanmean(tmp2, axis=0)

    tmp1 = tmp1.flatten()[~np.isnan(tmp1.flatten())]
    tmp2 = tmp2.flatten()[~np.isnan(tmp2.flatten())]

    boot_acc = stats.pearsonr(tmp1, tmp2)[0]
    boot_mse = metrics.mean_squared_error(tmp1, tmp2)

    xr.Dataset(
        data_vars=dict(
            iteration_acc=(["iters"], np.array([boot_acc])),
            iteration_mse=(["iters"], np.array([boot_mse]))
        ),
        attrs=dict(description="For bootstrap confidence intervals."),
    ).to_netcdf(
        f'/glade/scratch/molina/s2s/bootstrap/table5_temp_wr1_wk56/table5_boot_{ind + 1}.nc')

In [None]:
# ---

firstday = 29
seconday = 42
boot_iter_num = 10000

tmp_data1 = z500_era5_tmp_2.shape[0]
tmp_data2 = z500_cesm_tmp_2.shape[0]

# ---

all_era50 = t2m_era5.isel(lead=slice(firstday, seconday)).stack(
    new=('time', 'lead')).transpose('new', 'lat', 'lon').where(
    mask == 1.0).values
all_cesm0 = t2m_cesm.isel(lead=slice(firstday, seconday)).stack(
    new=('time', 'lead')).transpose('new', 'lat', 'lon').where(
    mask == 1.0).values

# ---

for ind in range(182, boot_iter_num):

    np.random.seed(ind)
    rand_indx1 = [np.random.choice(
        all_era50.shape[0]) for i in range(tmp_data1)]

    np.random.seed(ind + 1)
    rand_indx2 = [np.random.choice(
        all_cesm0.shape[0]) for i in range(tmp_data2)]

    tmp1 = all_era50[rand_indx1, ...]
    tmp2 = all_cesm0[rand_indx2, ...]

    with warnings.catch_warnings():

        warnings.simplefilter('ignore')

        tmp1 = np.nanmean(tmp1, axis=0)
        tmp2 = np.nanmean(tmp2, axis=0)

    tmp1 = tmp1.flatten()[~np.isnan(tmp1.flatten())]
    tmp2 = tmp2.flatten()[~np.isnan(tmp2.flatten())]

    boot_acc = stats.pearsonr(tmp1, tmp2)[0]
    boot_mse = metrics.mean_squared_error(tmp1, tmp2)

    xr.Dataset(
        data_vars=dict(
            iteration_acc=(["iters"], np.array([boot_acc])),
            iteration_mse=(["iters"], np.array([boot_mse]))
        ),
        attrs=dict(description="For bootstrap confidence intervals."),
    ).to_netcdf(
        f'/glade/scratch/molina/s2s/bootstrap/table5_temp_wr2_wk56/table5_boot_{ind + 1}.nc')

In [None]:
# ---

firstday = 29
seconday = 42
boot_iter_num = 10000

tmp_data1 = z500_era5_tmp_3.shape[0]
tmp_data2 = z500_cesm_tmp_3.shape[0]

# ---

all_era50 = t2m_era5.isel(lead=slice(firstday, seconday)).stack(
    new=('time', 'lead')).transpose('new', 'lat', 'lon').where(
    mask == 1.0).values
all_cesm0 = t2m_cesm.isel(lead=slice(firstday, seconday)).stack(
    new=('time', 'lead')).transpose('new', 'lat', 'lon').where(
    mask == 1.0).values

# ---

for ind in range(0, boot_iter_num):

    np.random.seed(ind)
    rand_indx1 = [np.random.choice(
        all_era50.shape[0]) for i in range(tmp_data1)]

    np.random.seed(ind + 1)
    rand_indx2 = [np.random.choice(
        all_cesm0.shape[0]) for i in range(tmp_data2)]

    tmp1 = all_era50[rand_indx1, ...]
    tmp2 = all_cesm0[rand_indx2, ...]

    with warnings.catch_warnings():

        warnings.simplefilter('ignore')

        tmp1 = np.nanmean(tmp1, axis=0)
        tmp2 = np.nanmean(tmp2, axis=0)

    tmp1 = tmp1.flatten()[~np.isnan(tmp1.flatten())]
    tmp2 = tmp2.flatten()[~np.isnan(tmp2.flatten())]

    boot_acc = stats.pearsonr(tmp1, tmp2)[0]
    boot_mse = metrics.mean_squared_error(tmp1, tmp2)

    xr.Dataset(
        data_vars=dict(
            iteration_acc=(["iters"], np.array([boot_acc])),
            iteration_mse=(["iters"], np.array([boot_mse]))
        ),
        attrs=dict(description="For bootstrap confidence intervals."),
    ).to_netcdf(
        f'/glade/scratch/molina/s2s/bootstrap/table5_temp_wr3_wk56/table5_boot_{ind + 1}.nc')

In [None]:
# ---

firstday = 29
seconday = 42
boot_iter_num = 10000

tmp_data1 = z500_era5_tmp_4.shape[0]
tmp_data2 = z500_cesm_tmp_4.shape[0]

# ---

all_era50 = t2m_era5.isel(lead=slice(firstday, seconday)).stack(
    new=('time', 'lead')).transpose('new', 'lat', 'lon').where(
    mask == 1.0).values
all_cesm0 = t2m_cesm.isel(lead=slice(firstday, seconday)).stack(
    new=('time', 'lead')).transpose('new', 'lat', 'lon').where(
    mask == 1.0).values

# ---

for ind in range(2006, boot_iter_num):

    np.random.seed(ind)
    rand_indx1 = [np.random.choice(
        all_era50.shape[0]) for i in range(tmp_data1)]

    np.random.seed(ind + 1)
    rand_indx2 = [np.random.choice(
        all_cesm0.shape[0]) for i in range(tmp_data2)]

    tmp1 = all_era50[rand_indx1, ...]
    tmp2 = all_cesm0[rand_indx2, ...]

    with warnings.catch_warnings():

        warnings.simplefilter('ignore')

        tmp1 = np.nanmean(tmp1, axis=0)
        tmp2 = np.nanmean(tmp2, axis=0)

    tmp1 = tmp1.flatten()[~np.isnan(tmp1.flatten())]
    tmp2 = tmp2.flatten()[~np.isnan(tmp2.flatten())]

    boot_acc = stats.pearsonr(tmp1, tmp2)[0]
    boot_mse = metrics.mean_squared_error(tmp1, tmp2)

    xr.Dataset(
        data_vars=dict(
            iteration_acc=(["iters"], np.array([boot_acc])),
            iteration_mse=(["iters"], np.array([boot_mse]))
        ),
        attrs=dict(description="For bootstrap confidence intervals."),
    ).to_netcdf(
        f'/glade/scratch/molina/s2s/bootstrap/table5_temp_wr4_wk56/table5_boot_{ind + 1}.nc')

In [11]:
lev_1 = 0.025
lev_2 = 0.975
lev_3 = 0.005
lev_4 = 0.995

table5_temp_wr1_wk56 = xr.open_mfdataset(
    '/glade/scratch/molina/s2s/bootstrap/table5_temp_wr1_wk56/table5_boot_*.nc',
    combine='nested', concat_dim='iters').chunk(
    dict(iters=-1)).quantile([lev_1, lev_2, lev_3, lev_4],
                             dim='iters', skipna=True)

table5_temp_wr2_wk56 = xr.open_mfdataset(
    '/glade/scratch/molina/s2s/bootstrap/table5_temp_wr2_wk56/table5_boot_*.nc',
    combine='nested', concat_dim='iters').chunk(
    dict(iters=-1)).quantile([lev_1, lev_2, lev_3, lev_4],
                             dim='iters', skipna=True)

table5_temp_wr3_wk56 = xr.open_mfdataset(
    '/glade/scratch/molina/s2s/bootstrap/table5_temp_wr3_wk56/table5_boot_*.nc',
    combine='nested', concat_dim='iters').chunk(
    dict(iters=-1)).quantile([lev_1, lev_2, lev_3, lev_4],
                             dim='iters', skipna=True)

table5_temp_wr4_wk56 = xr.open_mfdataset(
    '/glade/scratch/molina/s2s/bootstrap/table5_temp_wr4_wk56/table5_boot_*.nc',
    combine='nested', concat_dim='iters').chunk(
    dict(iters=-1)).quantile([lev_1, lev_2, lev_3, lev_4],
                             dim='iters', skipna=True)

## precipitation anomalies

In [13]:
firstday = 1
seconday = 14

# ---

temp_era5_01 = pr_era5_01.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).mean('new', skipna=True).where(mask == 1.0)

temp_era5_02 = pr_era5_02.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).mean('new', skipna=True).where(mask == 1.0)

temp_era5_03 = pr_era5_03.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).mean('new', skipna=True).where(mask == 1.0)

temp_era5_04 = pr_era5_04.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).mean('new', skipna=True).where(mask == 1.0)

# ---

temp_data = pr_cesm_01.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).mean('new', skipna=True).where(mask == 1.0)

r = np.around(
    stats.pearsonr(
        temp_era5_01.values.flatten()[~np.isnan(temp_era5_01.values.flatten())],
        temp_data.values.flatten()[~np.isnan(temp_data.values.flatten())])[0], 2)

mse = np.around(
    metrics.mean_squared_error(
        temp_era5_01.values.flatten()[~np.isnan(temp_era5_01.values.flatten())],
        temp_data.values.flatten()[~np.isnan(temp_data.values.flatten())]), 2)

print(f'R={r}, MSE={mse}')

# ---

temp_data = pr_cesm_02.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).mean('new', skipna=True).where(mask == 1.0)

r = np.around(
    stats.pearsonr(
        temp_era5_02.values.flatten()[~np.isnan(temp_era5_02.values.flatten())],
        temp_data.values.flatten()[~np.isnan(temp_data.values.flatten())])[0], 2)

mse = np.around(
    metrics.mean_squared_error(
        temp_era5_02.values.flatten()[~np.isnan(temp_era5_02.values.flatten())],
        temp_data.values.flatten()[~np.isnan(temp_data.values.flatten())]), 2)

print(f'R={r}, MSE={mse}')

# ---

temp_data = pr_cesm_03.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).mean('new', skipna=True).where(mask == 1.0)

r = np.around(
    stats.pearsonr(
        temp_era5_03.values.flatten()[~np.isnan(temp_era5_03.values.flatten())],
        temp_data.values.flatten()[~np.isnan(temp_data.values.flatten())])[0], 2)

mse = np.around(
    metrics.mean_squared_error(
        temp_era5_03.values.flatten()[~np.isnan(temp_era5_03.values.flatten())],
        temp_data.values.flatten()[~np.isnan(temp_data.values.flatten())]), 2)

print(f'R={r}, MSE={mse}')

# ---

temp_data = pr_cesm_04.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).mean('new', skipna=True).where(mask == 1.0)

r = np.around(
    stats.pearsonr(
        temp_era5_04.values.flatten()[~np.isnan(temp_era5_04.values.flatten())],
        temp_data.values.flatten()[~np.isnan(temp_data.values.flatten())])[0], 2)

mse = np.around(
    metrics.mean_squared_error(
        temp_era5_04.values.flatten()[~np.isnan(temp_era5_04.values.flatten())],
        temp_data.values.flatten()[~np.isnan(temp_data.values.flatten())]), 5)

print(f'R={r}, MSE={mse}')

# ---

R=0.95, MSE=0.18
R=0.88, MSE=0.06
R=0.92, MSE=0.09
R=0.92, MSE=0.10164


## bootstrap (weeks 1-2)

In [None]:
# ---

firstday = 1
seconday = 14
boot_iter_num = 10000

tmp_data1 = z500_era5_tmp_1.shape[0]
tmp_data2 = z500_cesm_tmp_1.shape[0]

# ---

all_era50 = pr_era5.isel(lead=slice(firstday, seconday)).stack(
    new=('time', 'lead')).transpose('new', 'lat', 'lon').where(
    mask == 1.0).values
all_cesm0 = pr_cesm.isel(lead=slice(firstday, seconday)).stack(
    new=('time', 'lead')).transpose('new', 'lat', 'lon').where(
    mask == 1.0).values

# ---

for ind in range(0, boot_iter_num):

    np.random.seed(ind)
    rand_indx1 = [np.random.choice(
        all_era50.shape[0]) for i in range(tmp_data1)]

    np.random.seed(ind + 1)
    rand_indx2 = [np.random.choice(
        all_cesm0.shape[0]) for i in range(tmp_data2)]

    tmp1 = all_era50[rand_indx1, ...]
    tmp2 = all_cesm0[rand_indx2, ...]

    with warnings.catch_warnings():

        warnings.simplefilter('ignore')

        tmp1 = np.nanmean(tmp1, axis=0)
        tmp2 = np.nanmean(tmp2, axis=0)

    tmp1 = tmp1.flatten()[~np.isnan(tmp1.flatten())]
    tmp2 = tmp2.flatten()[~np.isnan(tmp2.flatten())]

    boot_acc = stats.pearsonr(tmp1, tmp2)[0]
    boot_mse = metrics.mean_squared_error(tmp1, tmp2)

    xr.Dataset(
        data_vars=dict(
            iteration_acc=(["iters"], np.array([boot_acc])),
            iteration_mse=(["iters"], np.array([boot_mse]))
        ),
        attrs=dict(description="For bootstrap confidence intervals."),
    ).to_netcdf(
        f'/glade/scratch/molina/s2s/bootstrap/table5_pr_wr1_wk12/table5_boot_{ind + 1}.nc')

In [None]:
# ---

firstday = 1
seconday = 14
boot_iter_num = 10000

tmp_data1 = z500_era5_tmp_2.shape[0]
tmp_data2 = z500_cesm_tmp_2.shape[0]

# ---

all_era50 = pr_era5.isel(lead=slice(firstday, seconday)).stack(
    new=('time', 'lead')).transpose('new', 'lat', 'lon').where(
    mask == 1.0).values
all_cesm0 = pr_cesm.isel(lead=slice(firstday, seconday)).stack(
    new=('time', 'lead')).transpose('new', 'lat', 'lon').where(
    mask == 1.0).values

# ---

for ind in range(5477, boot_iter_num):

    np.random.seed(ind)
    rand_indx1 = [np.random.choice(
        all_era50.shape[0]) for i in range(tmp_data1)]

    np.random.seed(ind + 1)
    rand_indx2 = [np.random.choice(
        all_cesm0.shape[0]) for i in range(tmp_data2)]

    tmp1 = all_era50[rand_indx1, ...]
    tmp2 = all_cesm0[rand_indx2, ...]

    with warnings.catch_warnings():

        warnings.simplefilter('ignore')

        tmp1 = np.nanmean(tmp1, axis=0)
        tmp2 = np.nanmean(tmp2, axis=0)

    tmp1 = tmp1.flatten()[~np.isnan(tmp1.flatten())]
    tmp2 = tmp2.flatten()[~np.isnan(tmp2.flatten())]

    boot_acc = stats.pearsonr(tmp1, tmp2)[0]
    boot_mse = metrics.mean_squared_error(tmp1, tmp2)

    xr.Dataset(
        data_vars=dict(
            iteration_acc=(["iters"], np.array([boot_acc])),
            iteration_mse=(["iters"], np.array([boot_mse]))
        ),
        attrs=dict(description="For bootstrap confidence intervals."),
    ).to_netcdf(
        f'/glade/scratch/molina/s2s/bootstrap/table5_pr_wr2_wk12/table5_boot_{ind + 1}.nc')

In [11]:
# ---

firstday = 1
seconday = 14
boot_iter_num = 10000

tmp_data1 = z500_era5_tmp_3.shape[0]
tmp_data2 = z500_cesm_tmp_3.shape[0]

# ---

all_era50 = pr_era5.isel(lead=slice(firstday, seconday)).stack(
    new=('time', 'lead')).transpose('new', 'lat', 'lon').where(
    mask == 1.0).values
all_cesm0 = pr_cesm.isel(lead=slice(firstday, seconday)).stack(
    new=('time', 'lead')).transpose('new', 'lat', 'lon').where(
    mask == 1.0).values

# ---

for ind in range(0, boot_iter_num):

    np.random.seed(ind)
    rand_indx1 = [np.random.choice(
        all_era50.shape[0]) for i in range(tmp_data1)]

    np.random.seed(ind + 1)
    rand_indx2 = [np.random.choice(
        all_cesm0.shape[0]) for i in range(tmp_data2)]

    tmp1 = all_era50[rand_indx1, ...]
    tmp2 = all_cesm0[rand_indx2, ...]

    with warnings.catch_warnings():

        warnings.simplefilter('ignore')

        tmp1 = np.nanmean(tmp1, axis=0)
        tmp2 = np.nanmean(tmp2, axis=0)

    tmp1 = tmp1.flatten()[~np.isnan(tmp1.flatten())]
    tmp2 = tmp2.flatten()[~np.isnan(tmp2.flatten())]

    boot_acc = stats.pearsonr(tmp1, tmp2)[0]
    boot_mse = metrics.mean_squared_error(tmp1, tmp2)

    xr.Dataset(
        data_vars=dict(
            iteration_acc=(["iters"], np.array([boot_acc])),
            iteration_mse=(["iters"], np.array([boot_mse]))
        ),
        attrs=dict(description="For bootstrap confidence intervals."),
    ).to_netcdf(
        f'/glade/scratch/molina/s2s/bootstrap/table5_pr_wr3_wk12/table5_boot_{ind + 1}.nc')

In [10]:
# ---

firstday = 1
seconday = 14
boot_iter_num = 10000

tmp_data1 = z500_era5_tmp_4.shape[0]
tmp_data2 = z500_cesm_tmp_4.shape[0]

# ---

all_era50 = pr_era5.isel(lead=slice(firstday, seconday)).stack(
    new=('time', 'lead')).transpose('new', 'lat', 'lon').where(
    mask == 1.0).values
all_cesm0 = pr_cesm.isel(lead=slice(firstday, seconday)).stack(
    new=('time', 'lead')).transpose('new', 'lat', 'lon').where(
    mask == 1.0).values

# ---

for ind in range(4918, boot_iter_num):

    np.random.seed(ind)
    rand_indx1 = [np.random.choice(
        all_era50.shape[0]) for i in range(tmp_data1)]

    np.random.seed(ind + 1)
    rand_indx2 = [np.random.choice(
        all_cesm0.shape[0]) for i in range(tmp_data2)]

    tmp1 = all_era50[rand_indx1, ...]
    tmp2 = all_cesm0[rand_indx2, ...]

    with warnings.catch_warnings():

        warnings.simplefilter('ignore')

        tmp1 = np.nanmean(tmp1, axis=0)
        tmp2 = np.nanmean(tmp2, axis=0)

    tmp1 = tmp1.flatten()[~np.isnan(tmp1.flatten())]
    tmp2 = tmp2.flatten()[~np.isnan(tmp2.flatten())]

    boot_acc = stats.pearsonr(tmp1, tmp2)[0]
    boot_mse = metrics.mean_squared_error(tmp1, tmp2)

    xr.Dataset(
        data_vars=dict(
            iteration_acc=(["iters"], np.array([boot_acc])),
            iteration_mse=(["iters"], np.array([boot_mse]))
        ),
        attrs=dict(description="For bootstrap confidence intervals."),
    ).to_netcdf(
        f'/glade/scratch/molina/s2s/bootstrap/table5_pr_wr4_wk12/table5_boot_{ind + 1}.nc')

In [12]:
lev_1 = 0.025
lev_2 = 0.975
lev_3 = 0.005
lev_4 = 0.995

table5_pr_wr1_wk12 = xr.open_mfdataset(
    '/glade/scratch/molina/s2s/bootstrap/table5_pr_wr1_wk12/table5_boot_*.nc',
    combine='nested', concat_dim='iters').chunk(
    dict(iters=-1)).quantile([lev_1, lev_2, lev_3, lev_4],
                             dim='iters', skipna=True)

table5_pr_wr2_wk12 = xr.open_mfdataset(
    '/glade/scratch/molina/s2s/bootstrap/table5_pr_wr2_wk12/table5_boot_*.nc',
    combine='nested', concat_dim='iters').chunk(
    dict(iters=-1)).quantile([lev_1, lev_2, lev_3, lev_4],
                             dim='iters', skipna=True)

table5_pr_wr3_wk12 = xr.open_mfdataset(
    '/glade/scratch/molina/s2s/bootstrap/table5_pr_wr3_wk12/table5_boot_*.nc',
    combine='nested', concat_dim='iters').chunk(
    dict(iters=-1)).quantile([lev_1, lev_2, lev_3, lev_4],
                             dim='iters', skipna=True)

table5_pr_wr4_wk12 = xr.open_mfdataset(
    '/glade/scratch/molina/s2s/bootstrap/table5_pr_wr4_wk12/table5_boot_*.nc',
    combine='nested', concat_dim='iters').chunk(
    dict(iters=-1)).quantile([lev_1, lev_2, lev_3, lev_4],
                             dim='iters', skipna=True)

## weeks 3-4

In [9]:
firstday = 15
seconday = 28

# ---

temp_era5_01 = pr_era5_01.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).mean('new', skipna=True).where(mask == 1.0)

temp_era5_02 = pr_era5_02.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).mean('new', skipna=True).where(mask == 1.0)

temp_era5_03 = pr_era5_03.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).mean('new', skipna=True).where(mask == 1.0)

temp_era5_04 = pr_era5_04.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).mean('new', skipna=True).where(mask == 1.0)

# ---

temp_data = pr_cesm_01.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).mean('new', skipna=True).where(mask == 1.0)

r = np.around(
    stats.pearsonr(
        temp_era5_01.values.flatten()[~np.isnan(temp_era5_01.values.flatten())],
        temp_data.values.flatten()[~np.isnan(temp_data.values.flatten())])[0], 2)

mse = np.around(
    metrics.mean_squared_error(
        temp_era5_01.values.flatten()[~np.isnan(temp_era5_01.values.flatten())],
        temp_data.values.flatten()[~np.isnan(temp_data.values.flatten())]), 2)

print(f'R={r}, MSE={mse}')

# ---

temp_data = pr_cesm_02.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).mean('new', skipna=True).where(mask == 1.0)

r = np.around(
    stats.pearsonr(
        temp_era5_02.values.flatten()[~np.isnan(temp_era5_02.values.flatten())],
        temp_data.values.flatten()[~np.isnan(temp_data.values.flatten())])[0], 2)

mse = np.around(
    metrics.mean_squared_error(
        temp_era5_02.values.flatten()[~np.isnan(temp_era5_02.values.flatten())],
        temp_data.values.flatten()[~np.isnan(temp_data.values.flatten())]), 2)

print(f'R={r}, MSE={mse}')

# ---

temp_data = pr_cesm_03.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).mean('new', skipna=True).where(mask == 1.0)

r = np.around(
    stats.pearsonr(
        temp_era5_03.values.flatten()[~np.isnan(temp_era5_03.values.flatten())],
        temp_data.values.flatten()[~np.isnan(temp_data.values.flatten())])[0], 2)

mse = np.around(
    metrics.mean_squared_error(
        temp_era5_03.values.flatten()[~np.isnan(temp_era5_03.values.flatten())],
        temp_data.values.flatten()[~np.isnan(temp_data.values.flatten())]), 2)

print(f'R={r}, MSE={mse}')

# ---

temp_data = pr_cesm_04.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).mean('new', skipna=True).where(mask == 1.0)

r = np.around(
    stats.pearsonr(
        temp_era5_04.values.flatten()[~np.isnan(temp_era5_04.values.flatten())],
        temp_data.values.flatten()[~np.isnan(temp_data.values.flatten())])[0], 2)

mse = np.around(
    metrics.mean_squared_error(
        temp_era5_04.values.flatten()[~np.isnan(temp_era5_04.values.flatten())],
        temp_data.values.flatten()[~np.isnan(temp_data.values.flatten())]), 2)

print(f'R={r}, MSE={mse}')

# ---

R=0.93, MSE=0.47
R=0.71, MSE=0.14
R=0.92, MSE=0.23
R=0.92, MSE=0.26


In [12]:
# ---

firstday = 15
seconday = 28
boot_iter_num = 10000

tmp_data1 = z500_era5_tmp_1.shape[0]
tmp_data2 = z500_cesm_tmp_1.shape[0]

# ---

all_era50 = pr_era5.isel(lead=slice(firstday, seconday)).stack(
    new=('time', 'lead')).transpose('new', 'lat', 'lon').where(
    mask == 1.0).values
all_cesm0 = pr_cesm.isel(lead=slice(firstday, seconday)).stack(
    new=('time', 'lead')).transpose('new', 'lat', 'lon').where(
    mask == 1.0).values

# ---

for ind in range(0, boot_iter_num):

    np.random.seed(ind)
    rand_indx1 = [np.random.choice(
        all_era50.shape[0]) for i in range(tmp_data1)]

    np.random.seed(ind + 1)
    rand_indx2 = [np.random.choice(
        all_cesm0.shape[0]) for i in range(tmp_data2)]

    tmp1 = all_era50[rand_indx1, ...]
    tmp2 = all_cesm0[rand_indx2, ...]

    with warnings.catch_warnings():

        warnings.simplefilter('ignore')

        tmp1 = np.nanmean(tmp1, axis=0)
        tmp2 = np.nanmean(tmp2, axis=0)

    tmp1 = tmp1.flatten()[~np.isnan(tmp1.flatten())]
    tmp2 = tmp2.flatten()[~np.isnan(tmp2.flatten())]

    boot_acc = stats.pearsonr(tmp1, tmp2)[0]
    boot_mse = metrics.mean_squared_error(tmp1, tmp2)

    xr.Dataset(
        data_vars=dict(
            iteration_acc=(["iters"], np.array([boot_acc])),
            iteration_mse=(["iters"], np.array([boot_mse]))
        ),
        attrs=dict(description="For bootstrap confidence intervals."),
    ).to_netcdf(
        f'/glade/scratch/molina/s2s/bootstrap/table5_pr_wr1_wk34/table5_boot_{ind + 1}.nc')

In [13]:
# ---

firstday = 15
seconday = 28
boot_iter_num = 10000

tmp_data1 = z500_era5_tmp_2.shape[0]
tmp_data2 = z500_cesm_tmp_2.shape[0]

# ---

all_era50 = pr_era5.isel(lead=slice(firstday, seconday)).stack(
    new=('time', 'lead')).transpose('new', 'lat', 'lon').where(
    mask == 1.0).values
all_cesm0 = pr_cesm.isel(lead=slice(firstday, seconday)).stack(
    new=('time', 'lead')).transpose('new', 'lat', 'lon').where(
    mask == 1.0).values

# ---

for ind in range(0, boot_iter_num):

    np.random.seed(ind)
    rand_indx1 = [np.random.choice(
        all_era50.shape[0]) for i in range(tmp_data1)]

    np.random.seed(ind + 1)
    rand_indx2 = [np.random.choice(
        all_cesm0.shape[0]) for i in range(tmp_data2)]

    tmp1 = all_era50[rand_indx1, ...]
    tmp2 = all_cesm0[rand_indx2, ...]

    with warnings.catch_warnings():

        warnings.simplefilter('ignore')

        tmp1 = np.nanmean(tmp1, axis=0)
        tmp2 = np.nanmean(tmp2, axis=0)

    tmp1 = tmp1.flatten()[~np.isnan(tmp1.flatten())]
    tmp2 = tmp2.flatten()[~np.isnan(tmp2.flatten())]

    boot_acc = stats.pearsonr(tmp1, tmp2)[0]
    boot_mse = metrics.mean_squared_error(tmp1, tmp2)

    xr.Dataset(
        data_vars=dict(
            iteration_acc=(["iters"], np.array([boot_acc])),
            iteration_mse=(["iters"], np.array([boot_mse]))
        ),
        attrs=dict(description="For bootstrap confidence intervals."),
    ).to_netcdf(
        f'/glade/scratch/molina/s2s/bootstrap/table5_pr_wr2_wk34/table5_boot_{ind + 1}.nc')

In [None]:
# ---

firstday = 15
seconday = 28
boot_iter_num = 10000

tmp_data1 = z500_era5_tmp_3.shape[0]
tmp_data2 = z500_cesm_tmp_3.shape[0]

# ---

all_era50 = pr_era5.isel(lead=slice(firstday, seconday)).stack(
    new=('time', 'lead')).transpose('new', 'lat', 'lon').where(
    mask == 1.0).values
all_cesm0 = pr_cesm.isel(lead=slice(firstday, seconday)).stack(
    new=('time', 'lead')).transpose('new', 'lat', 'lon').where(
    mask == 1.0).values

# ---

for ind in range(41, boot_iter_num):

    np.random.seed(ind)
    rand_indx1 = [np.random.choice(
        all_era50.shape[0]) for i in range(tmp_data1)]

    np.random.seed(ind + 1)
    rand_indx2 = [np.random.choice(
        all_cesm0.shape[0]) for i in range(tmp_data2)]

    tmp1 = all_era50[rand_indx1, ...]
    tmp2 = all_cesm0[rand_indx2, ...]

    with warnings.catch_warnings():

        warnings.simplefilter('ignore')

        tmp1 = np.nanmean(tmp1, axis=0)
        tmp2 = np.nanmean(tmp2, axis=0)

    tmp1 = tmp1.flatten()[~np.isnan(tmp1.flatten())]
    tmp2 = tmp2.flatten()[~np.isnan(tmp2.flatten())]

    boot_acc = stats.pearsonr(tmp1, tmp2)[0]
    boot_mse = metrics.mean_squared_error(tmp1, tmp2)

    xr.Dataset(
        data_vars=dict(
            iteration_acc=(["iters"], np.array([boot_acc])),
            iteration_mse=(["iters"], np.array([boot_mse]))
        ),
        attrs=dict(description="For bootstrap confidence intervals."),
    ).to_netcdf(
        f'/glade/scratch/molina/s2s/bootstrap/table5_pr_wr3_wk34/table5_boot_{ind + 1}.nc')

In [None]:
# ---

firstday = 15
seconday = 28
boot_iter_num = 10000

tmp_data1 = z500_era5_tmp_4.shape[0]
tmp_data2 = z500_cesm_tmp_4.shape[0]

# ---

all_era50 = pr_era5.isel(lead=slice(firstday, seconday)).stack(
    new=('time', 'lead')).transpose('new', 'lat', 'lon').where(
    mask == 1.0).values
all_cesm0 = pr_cesm.isel(lead=slice(firstday, seconday)).stack(
    new=('time', 'lead')).transpose('new', 'lat', 'lon').where(
    mask == 1.0).values

# ---

for ind in range(8456, boot_iter_num):

    np.random.seed(ind)
    rand_indx1 = [np.random.choice(
        all_era50.shape[0]) for i in range(tmp_data1)]

    np.random.seed(ind + 1)
    rand_indx2 = [np.random.choice(
        all_cesm0.shape[0]) for i in range(tmp_data2)]

    tmp1 = all_era50[rand_indx1, ...]
    tmp2 = all_cesm0[rand_indx2, ...]

    with warnings.catch_warnings():

        warnings.simplefilter('ignore')

        tmp1 = np.nanmean(tmp1, axis=0)
        tmp2 = np.nanmean(tmp2, axis=0)

    tmp1 = tmp1.flatten()[~np.isnan(tmp1.flatten())]
    tmp2 = tmp2.flatten()[~np.isnan(tmp2.flatten())]

    boot_acc = stats.pearsonr(tmp1, tmp2)[0]
    boot_mse = metrics.mean_squared_error(tmp1, tmp2)

    xr.Dataset(
        data_vars=dict(
            iteration_acc=(["iters"], np.array([boot_acc])),
            iteration_mse=(["iters"], np.array([boot_mse]))
        ),
        attrs=dict(description="For bootstrap confidence intervals."),
    ).to_netcdf(
        f'/glade/scratch/molina/s2s/bootstrap/table5_pr_wr4_wk34/table5_boot_{ind + 1}.nc')

In [13]:
lev_1 = 0.025
lev_2 = 0.975
lev_3 = 0.005
lev_4 = 0.995

table5_pr_wr1_wk34 = xr.open_mfdataset(
    '/glade/scratch/molina/s2s/bootstrap/table5_pr_wr1_wk34/table5_boot_*.nc',
    combine='nested', concat_dim='iters').chunk(
    dict(iters=-1)).quantile([lev_1, lev_2, lev_3, lev_4],
                             dim='iters', skipna=True)

table5_pr_wr2_wk34 = xr.open_mfdataset(
    '/glade/scratch/molina/s2s/bootstrap/table5_pr_wr2_wk34/table5_boot_*.nc',
    combine='nested', concat_dim='iters').chunk(
    dict(iters=-1)).quantile([lev_1, lev_2, lev_3, lev_4],
                             dim='iters', skipna=True)

table5_pr_wr3_wk34 = xr.open_mfdataset(
    '/glade/scratch/molina/s2s/bootstrap/table5_pr_wr3_wk34/table5_boot_*.nc',
    combine='nested', concat_dim='iters').chunk(
    dict(iters=-1)).quantile([lev_1, lev_2, lev_3, lev_4],
                             dim='iters', skipna=True)

table5_pr_wr4_wk34 = xr.open_mfdataset(
    '/glade/scratch/molina/s2s/bootstrap/table5_pr_wr4_wk34/table5_boot_*.nc',
    combine='nested', concat_dim='iters').chunk(
    dict(iters=-1)).quantile([lev_1, lev_2, lev_3, lev_4],
                             dim='iters', skipna=True)

## weeks 5-6

In [14]:
firstday = 29
seconday = 42

# ---

temp_era5_01 = pr_era5_01.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).mean('new', skipna=True).where(mask == 1.0)

temp_era5_02 = pr_era5_02.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).mean('new', skipna=True).where(mask == 1.0)

temp_era5_03 = pr_era5_03.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).mean('new', skipna=True).where(mask == 1.0)

temp_era5_04 = pr_era5_04.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).mean('new', skipna=True).where(mask == 1.0)

# ---

temp_data = pr_cesm_01.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).mean('new', skipna=True).where(mask == 1.0)

r = np.around(
    stats.pearsonr(
        temp_era5_01.values.flatten()[~np.isnan(temp_era5_01.values.flatten())],
        temp_data.values.flatten()[~np.isnan(temp_data.values.flatten())])[0], 2)

mse = np.around(
    metrics.mean_squared_error(
        temp_era5_01.values.flatten()[~np.isnan(temp_era5_01.values.flatten())],
        temp_data.values.flatten()[~np.isnan(temp_data.values.flatten())]), 2)

print(f'R={r}, MSE={mse}')

# ---

temp_data = pr_cesm_02.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).mean('new', skipna=True).where(mask == 1.0)

r = np.around(
    stats.pearsonr(
        temp_era5_02.values.flatten()[~np.isnan(temp_era5_02.values.flatten())],
        temp_data.values.flatten()[~np.isnan(temp_data.values.flatten())])[0], 2)

mse = np.around(
    metrics.mean_squared_error(
        temp_era5_02.values.flatten()[~np.isnan(temp_era5_02.values.flatten())],
        temp_data.values.flatten()[~np.isnan(temp_data.values.flatten())]), 2)

print(f'R={r}, MSE={mse}')

# ---

temp_data = pr_cesm_03.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).mean('new', skipna=True).where(mask == 1.0)

r = np.around(
    stats.pearsonr(
        temp_era5_03.values.flatten()[~np.isnan(temp_era5_03.values.flatten())],
        temp_data.values.flatten()[~np.isnan(temp_data.values.flatten())])[0], 2)

mse = np.around(
    metrics.mean_squared_error(
        temp_era5_03.values.flatten()[~np.isnan(temp_era5_03.values.flatten())],
        temp_data.values.flatten()[~np.isnan(temp_data.values.flatten())]), 2)

print(f'R={r}, MSE={mse}')

# ---

temp_data = pr_cesm_04.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).mean('new', skipna=True).where(mask == 1.0)

r = np.around(
    stats.pearsonr(
        temp_era5_04.values.flatten()[~np.isnan(temp_era5_04.values.flatten())],
        temp_data.values.flatten()[~np.isnan(temp_data.values.flatten())])[0], 2)

mse = np.around(
    metrics.mean_squared_error(
        temp_era5_04.values.flatten()[~np.isnan(temp_era5_04.values.flatten())],
        temp_data.values.flatten()[~np.isnan(temp_data.values.flatten())]), 2)

print(f'R={r}, MSE={mse}')

# ---

R=0.92, MSE=0.5
R=0.74, MSE=0.13
R=0.9, MSE=0.28
R=0.91, MSE=0.32


## bootstrap (weeks 5-6)

In [None]:
# ---

firstday = 29
seconday = 42
boot_iter_num = 10000

tmp_data1 = z500_era5_tmp_1.shape[0]
tmp_data2 = z500_cesm_tmp_1.shape[0]

# ---

all_era50 = pr_era5.isel(lead=slice(firstday, seconday)).stack(
    new=('time', 'lead')).transpose('new', 'lat', 'lon').where(
    mask == 1.0).values
all_cesm0 = pr_cesm.isel(lead=slice(firstday, seconday)).stack(
    new=('time', 'lead')).transpose('new', 'lat', 'lon').where(
    mask == 1.0).values

# ---

for ind in range(0, boot_iter_num):

    np.random.seed(ind)
    rand_indx1 = [np.random.choice(
        all_era50.shape[0]) for i in range(tmp_data1)]

    np.random.seed(ind + 1)
    rand_indx2 = [np.random.choice(
        all_cesm0.shape[0]) for i in range(tmp_data2)]

    tmp1 = all_era50[rand_indx1, ...]
    tmp2 = all_cesm0[rand_indx2, ...]

    with warnings.catch_warnings():

        warnings.simplefilter('ignore')

        tmp1 = np.nanmean(tmp1, axis=0)
        tmp2 = np.nanmean(tmp2, axis=0)

    tmp1 = tmp1.flatten()[~np.isnan(tmp1.flatten())]
    tmp2 = tmp2.flatten()[~np.isnan(tmp2.flatten())]

    boot_acc = stats.pearsonr(tmp1, tmp2)[0]
    boot_mse = metrics.mean_squared_error(tmp1, tmp2)

    xr.Dataset(
        data_vars=dict(
            iteration_acc=(["iters"], np.array([boot_acc])),
            iteration_mse=(["iters"], np.array([boot_mse]))
        ),
        attrs=dict(description="For bootstrap confidence intervals."),
    ).to_netcdf(
        f'/glade/scratch/molina/s2s/bootstrap/table5_pr_wr1_wk56/table5_boot_{ind + 1}.nc')

In [None]:
# ---

firstday = 29
seconday = 42
boot_iter_num = 10000

tmp_data1 = z500_era5_tmp_2.shape[0]
tmp_data2 = z500_cesm_tmp_2.shape[0]

# ---

all_era50 = pr_era5.isel(lead=slice(firstday, seconday)).stack(
    new=('time', 'lead')).transpose('new', 'lat', 'lon').where(
    mask == 1.0).values
all_cesm0 = pr_cesm.isel(lead=slice(firstday, seconday)).stack(
    new=('time', 'lead')).transpose('new', 'lat', 'lon').where(
    mask == 1.0).values

# ---

for ind in range(0, boot_iter_num):

    np.random.seed(ind)
    rand_indx1 = [np.random.choice(
        all_era50.shape[0]) for i in range(tmp_data1)]

    np.random.seed(ind + 1)
    rand_indx2 = [np.random.choice(
        all_cesm0.shape[0]) for i in range(tmp_data2)]

    tmp1 = all_era50[rand_indx1, ...]
    tmp2 = all_cesm0[rand_indx2, ...]

    with warnings.catch_warnings():

        warnings.simplefilter('ignore')

        tmp1 = np.nanmean(tmp1, axis=0)
        tmp2 = np.nanmean(tmp2, axis=0)

    tmp1 = tmp1.flatten()[~np.isnan(tmp1.flatten())]
    tmp2 = tmp2.flatten()[~np.isnan(tmp2.flatten())]

    boot_acc = stats.pearsonr(tmp1, tmp2)[0]
    boot_mse = metrics.mean_squared_error(tmp1, tmp2)

    xr.Dataset(
        data_vars=dict(
            iteration_acc=(["iters"], np.array([boot_acc])),
            iteration_mse=(["iters"], np.array([boot_mse]))
        ),
        attrs=dict(description="For bootstrap confidence intervals."),
    ).to_netcdf(
        f'/glade/scratch/molina/s2s/bootstrap/table5_pr_wr2_wk56/table5_boot_{ind + 1}.nc')

In [None]:
# ---

firstday = 29
seconday = 42
boot_iter_num = 10000

tmp_data1 = z500_era5_tmp_3.shape[0]
tmp_data2 = z500_cesm_tmp_3.shape[0]

# ---

all_era50 = pr_era5.isel(lead=slice(firstday, seconday)).stack(
    new=('time', 'lead')).transpose('new', 'lat', 'lon').where(
    mask == 1.0).values
all_cesm0 = pr_cesm.isel(lead=slice(firstday, seconday)).stack(
    new=('time', 'lead')).transpose('new', 'lat', 'lon').where(
    mask == 1.0).values

# ---

for ind in range(5423, boot_iter_num):

    np.random.seed(ind)
    rand_indx1 = [np.random.choice(
        all_era50.shape[0]) for i in range(tmp_data1)]

    np.random.seed(ind + 1)
    rand_indx2 = [np.random.choice(
        all_cesm0.shape[0]) for i in range(tmp_data2)]

    tmp1 = all_era50[rand_indx1, ...]
    tmp2 = all_cesm0[rand_indx2, ...]

    with warnings.catch_warnings():

        warnings.simplefilter('ignore')

        tmp1 = np.nanmean(tmp1, axis=0)
        tmp2 = np.nanmean(tmp2, axis=0)

    tmp1 = tmp1.flatten()[~np.isnan(tmp1.flatten())]
    tmp2 = tmp2.flatten()[~np.isnan(tmp2.flatten())]

    boot_acc = stats.pearsonr(tmp1, tmp2)[0]
    boot_mse = metrics.mean_squared_error(tmp1, tmp2)

    xr.Dataset(
        data_vars=dict(
            iteration_acc=(["iters"], np.array([boot_acc])),
            iteration_mse=(["iters"], np.array([boot_mse]))
        ),
        attrs=dict(description="For bootstrap confidence intervals."),
    ).to_netcdf(
        f'/glade/scratch/molina/s2s/bootstrap/table5_pr_wr3_wk56/table5_boot_{ind + 1}.nc')

In [None]:
# ---

firstday = 29
seconday = 42
boot_iter_num = 10000

tmp_data1 = z500_era5_tmp_4.shape[0]
tmp_data2 = z500_cesm_tmp_4.shape[0]

# ---

all_era50 = pr_era5.isel(lead=slice(firstday, seconday)).stack(
    new=('time', 'lead')).transpose('new', 'lat', 'lon').where(
    mask == 1.0).values
all_cesm0 = pr_cesm.isel(lead=slice(firstday, seconday)).stack(
    new=('time', 'lead')).transpose('new', 'lat', 'lon').where(
    mask == 1.0).values

# ---

for ind in range(0, boot_iter_num):

    np.random.seed(ind)
    rand_indx1 = [np.random.choice(
        all_era50.shape[0]) for i in range(tmp_data1)]

    np.random.seed(ind + 1)
    rand_indx2 = [np.random.choice(
        all_cesm0.shape[0]) for i in range(tmp_data2)]

    tmp1 = all_era50[rand_indx1, ...]
    tmp2 = all_cesm0[rand_indx2, ...]

    with warnings.catch_warnings():

        warnings.simplefilter('ignore')

        tmp1 = np.nanmean(tmp1, axis=0)
        tmp2 = np.nanmean(tmp2, axis=0)

    tmp1 = tmp1.flatten()[~np.isnan(tmp1.flatten())]
    tmp2 = tmp2.flatten()[~np.isnan(tmp2.flatten())]

    boot_acc = stats.pearsonr(tmp1, tmp2)[0]
    boot_mse = metrics.mean_squared_error(tmp1, tmp2)

    xr.Dataset(
        data_vars=dict(
            iteration_acc=(["iters"], np.array([boot_acc])),
            iteration_mse=(["iters"], np.array([boot_mse]))
        ),
        attrs=dict(description="For bootstrap confidence intervals."),
    ).to_netcdf(
        f'/glade/scratch/molina/s2s/bootstrap/table5_pr_wr4_wk56/table5_boot_{ind + 1}.nc')

In [14]:
lev_1 = 0.025
lev_2 = 0.975
lev_3 = 0.005
lev_4 = 0.995

table5_pr_wr1_wk56 = xr.open_mfdataset(
    '/glade/scratch/molina/s2s/bootstrap/table5_pr_wr1_wk56/table5_boot_*.nc',
    combine='nested', concat_dim='iters').chunk(
    dict(iters=-1)).quantile([lev_1, lev_2, lev_3, lev_4],
                             dim='iters', skipna=True)

table5_pr_wr2_wk56 = xr.open_mfdataset(
    '/glade/scratch/molina/s2s/bootstrap/table5_pr_wr2_wk56/table5_boot_*.nc',
    combine='nested', concat_dim='iters').chunk(
    dict(iters=-1)).quantile([lev_1, lev_2, lev_3, lev_4],
                             dim='iters', skipna=True)

table5_pr_wr3_wk56 = xr.open_mfdataset(
    '/glade/scratch/molina/s2s/bootstrap/table5_pr_wr3_wk56/table5_boot_*.nc',
    combine='nested', concat_dim='iters').chunk(
    dict(iters=-1)).quantile([lev_1, lev_2, lev_3, lev_4],
                             dim='iters', skipna=True)

table5_pr_wr4_wk56 = xr.open_mfdataset(
    '/glade/scratch/molina/s2s/bootstrap/table5_pr_wr4_wk56/table5_boot_*.nc',
    combine='nested', concat_dim='iters').chunk(
    dict(iters=-1)).quantile([lev_1, lev_2, lev_3, lev_4],
                             dim='iters', skipna=True)

## assemble dataset

In [15]:
ds_table5_pr_acc = xr.Dataset(

    data_vars=dict(

        table5_pr_wr1_wk12=(["quantile"],
                            table5_pr_wr1_wk12["iteration_acc"].data),
        table5_pr_wr2_wk12=(["quantile"],
                            table5_pr_wr2_wk12["iteration_acc"].data),
        table5_pr_wr3_wk12=(["quantile"],
                            table5_pr_wr3_wk12["iteration_acc"].data),
        table5_pr_wr4_wk12=(["quantile"],
                            table5_pr_wr4_wk12["iteration_acc"].data),

        table5_pr_wr1_wk34=(["quantile"],
                            table5_pr_wr1_wk34["iteration_acc"].data),
        table5_pr_wr2_wk34=(["quantile"],
                            table5_pr_wr2_wk34["iteration_acc"].data),
        table5_pr_wr3_wk34=(["quantile"],
                            table5_pr_wr3_wk34["iteration_acc"].data),
        table5_pr_wr4_wk34=(["quantile"],
                            table5_pr_wr4_wk34["iteration_acc"].data),

        table5_pr_wr1_wk56=(["quantile"],
                            table5_pr_wr1_wk56["iteration_acc"].data),
        table5_pr_wr2_wk56=(["quantile"],
                            table5_pr_wr2_wk56["iteration_acc"].data),
        table5_pr_wr3_wk56=(["quantile"],
                            table5_pr_wr3_wk56["iteration_acc"].data),
        table5_pr_wr4_wk56=(["quantile"],
                            table5_pr_wr4_wk56["iteration_acc"].data),

    ),

    coords=dict(
        quantile=(["quantile"], [lev_1, lev_2, lev_3, lev_4]),
        iters=(["iters"], np.arange(0, 1, 1)),
    ),

    attrs=dict(description="For bootstrap confidence intervals."),
)

ds_table5_temp_acc = xr.Dataset(

    data_vars=dict(

        table5_temp_wr1_wk12=(["quantile"],
                              table5_temp_wr1_wk12["iteration_acc"].data),
        table5_temp_wr2_wk12=(["quantile"],
                              table5_temp_wr2_wk12["iteration_acc"].data),
        table5_temp_wr3_wk12=(["quantile"],
                              table5_temp_wr3_wk12["iteration_acc"].data),
        table5_temp_wr4_wk12=(["quantile"],
                              table5_temp_wr4_wk12["iteration_acc"].data),

        table5_temp_wr1_wk34=(["quantile"],
                              table5_temp_wr1_wk34["iteration_acc"].data),
        table5_temp_wr2_wk34=(["quantile"],
                              table5_temp_wr2_wk34["iteration_acc"].data),
        table5_temp_wr3_wk34=(["quantile"],
                              table5_temp_wr3_wk34["iteration_acc"].data),
        table5_temp_wr4_wk34=(["quantile"],
                              table5_temp_wr4_wk34["iteration_acc"].data),

        table5_temp_wr1_wk56=(["quantile"],
                              table5_temp_wr1_wk56["iteration_acc"].data),
        table5_temp_wr2_wk56=(["quantile"],
                              table5_temp_wr2_wk56["iteration_acc"].data),
        table5_temp_wr3_wk56=(["quantile"],
                              table5_temp_wr3_wk56["iteration_acc"].data),
        table5_temp_wr4_wk56=(["quantile"],
                              table5_temp_wr4_wk56["iteration_acc"].data),

    ),

    coords=dict(
        quantile=(["quantile"], [lev_1, lev_2, lev_3, lev_4]),
    ),

    attrs=dict(description="For bootstrap confidence intervals."),
)

In [16]:
ds_table5_pr_mse = xr.Dataset(

    data_vars=dict(

        table5_pr_wr1_wk12=(["quantile"],
                            table5_pr_wr1_wk12["iteration_mse"].data),
        table5_pr_wr2_wk12=(["quantile"],
                            table5_pr_wr2_wk12["iteration_mse"].data),
        table5_pr_wr3_wk12=(["quantile"],
                            table5_pr_wr3_wk12["iteration_mse"].data),
        table5_pr_wr4_wk12=(["quantile"],
                            table5_pr_wr4_wk12["iteration_mse"].data),

        table5_pr_wr1_wk34=(["quantile"],
                            table5_pr_wr1_wk34["iteration_mse"].data),
        table5_pr_wr2_wk34=(["quantile"],
                            table5_pr_wr2_wk34["iteration_mse"].data),
        table5_pr_wr3_wk34=(["quantile"],
                            table5_pr_wr3_wk34["iteration_mse"].data),
        table5_pr_wr4_wk34=(["quantile"],
                            table5_pr_wr4_wk34["iteration_mse"].data),

        table5_pr_wr1_wk56=(["quantile"],
                            table5_pr_wr1_wk56["iteration_mse"].data),
        table5_pr_wr2_wk56=(["quantile"],
                            table5_pr_wr2_wk56["iteration_mse"].data),
        table5_pr_wr3_wk56=(["quantile"],
                            table5_pr_wr3_wk56["iteration_mse"].data),
        table5_pr_wr4_wk56=(["quantile"],
                            table5_pr_wr4_wk56["iteration_mse"].data),

    ),

    coords=dict(
        quantile=(["quantile"], [lev_1, lev_2, lev_3, lev_4]),
        iters=(["iters"], np.arange(0, 1, 1)),
    ),

    attrs=dict(description="For bootstrap confidence intervals."),
)

ds_table5_temp_mse = xr.Dataset(

    data_vars=dict(

        table5_temp_wr1_wk12=(["quantile"],
                              table5_temp_wr1_wk12["iteration_mse"].data),
        table5_temp_wr2_wk12=(["quantile"],
                              table5_temp_wr2_wk12["iteration_mse"].data),
        table5_temp_wr3_wk12=(["quantile"],
                              table5_temp_wr3_wk12["iteration_mse"].data),
        table5_temp_wr4_wk12=(["quantile"],
                              table5_temp_wr4_wk12["iteration_mse"].data),

        table5_temp_wr1_wk34=(["quantile"],
                              table5_temp_wr1_wk34["iteration_mse"].data),
        table5_temp_wr2_wk34=(["quantile"],
                              table5_temp_wr2_wk34["iteration_mse"].data),
        table5_temp_wr3_wk34=(["quantile"],
                              table5_temp_wr3_wk34["iteration_mse"].data),
        table5_temp_wr4_wk34=(["quantile"],
                              table5_temp_wr4_wk34["iteration_mse"].data),

        table5_temp_wr1_wk56=(["quantile"],
                              table5_temp_wr1_wk56["iteration_mse"].data),
        table5_temp_wr2_wk56=(["quantile"],
                              table5_temp_wr2_wk56["iteration_mse"].data),
        table5_temp_wr3_wk56=(["quantile"],
                              table5_temp_wr3_wk56["iteration_mse"].data),
        table5_temp_wr4_wk56=(["quantile"],
                              table5_temp_wr4_wk56["iteration_mse"].data),

    ),

    coords=dict(
        quantile=(["quantile"], [lev_1, lev_2, lev_3, lev_4]),
    ),

    attrs=dict(description="For bootstrap confidence intervals."),
)

In [21]:
ds_table5_temp_acc.to_netcdf(
    '/glade/scratch/molina/s2s/bootstrap/ds_table5_temp_acc.nc')

In [22]:
ds_table5_pr_acc.to_netcdf(
    '/glade/scratch/molina/s2s/bootstrap/ds_table5_pr_acc.nc')

In [23]:
ds_table5_temp_mse.to_netcdf(
    '/glade/scratch/molina/s2s/bootstrap/ds_table5_temp_mse.nc')

In [24]:
ds_table5_pr_mse.to_netcdf(
    '/glade/scratch/molina/s2s/bootstrap/ds_table5_pr_mse.nc')

# Table 5

### temperature

In [16]:
xr.open_dataset(
    '/glade/scratch/molina/s2s/bootstrap/ds_table5_temp_acc.nc')

In [19]:
xr.open_dataset(
    '/glade/scratch/molina/s2s/bootstrap/ds_table5_temp_mse.nc')

In [21]:
xr.open_dataset(
    '/glade/scratch/molina/s2s/bootstrap/ds_table5_pr_acc.nc')

In [22]:
xr.open_dataset(
    '/glade/scratch/molina/s2s/bootstrap/ds_table5_pr_mse.nc')