## Imports

In [1]:
import numpy as np
import pandas as pd
import xarray as xr

from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

import som_analysis
import cluster_analysis
import narm_analysis

## functions

In [2]:
def get_cold_indx(ds, mo_init=9, mo_end=2):
    """
    Extract indices for cold season.
    Grabbing Sept thru February init, for Oct thru March predictions.
    """
    dt_array = pd.to_datetime(ds['time'])
    return xr.where((dt_array.month >= mo_init) | (dt_array.month <= mo_end),
                    True, False)

## open and preprocess data

In [3]:
# region for clustering
lat0 = 10
lat1 = 70
lon0 = -150
lon1 = -40

# open era5 data and slice
ds_era5 = narm_analysis.era5_z500(lat0=lat0, lat1=lat1, lon0=lon0, lon1=lon1)

# era5 anomalies
ds_era5_anom = narm_analysis.era5_climo_wrs(
    ds_era5, rolling_days=5, variable='clim')

# restructure era5 array for machine learning training (SONDJFM)
ds_era5_anom = ds_era5_anom[get_cold_indx(
    ds_era5_anom, mo_init=10, mo_end=3), ...]
ds_era5_train = ds_era5_anom.stack(
    flat=('lat', 'lon')).transpose('time', 'flat').values

## pca and kmeans with era5

In [4]:
# create pca object
pca_obj = PCA(12, whiten=True)

# fit pca with era5
pca_obj = pca_obj.fit(ds_era5_train)

# transform era5 data with pca
ds_era5_train = pca_obj.transform(ds_era5_train)

print(f'Variance explained: {pca_obj.explained_variance_ratio_ * 100}')
print(
f'Cumulative sum of variance explained for EOF1 and EOF2: {np.cumsum(pca_obj.explained_variance_ratio_) * 100}'
)

# train kmeans
k_means = KMeans(n_clusters=4,
                 init='k-means++',
                 n_init=10000,
                 max_iter=300,
                 tol=0.0001,
                 verbose=0,
                 random_state=0).fit(ds_era5_train)

print(f'inertia: {k_means.inertia_}')

Variance explained: [25.95315607 17.65410568 11.94871708  9.0784389   7.98100848  6.14181738
  4.32605934  2.61658689  2.22642929  2.17049559  1.49813958  1.22541708]
Cumulative sum of variance explained for EOF1 and EOF2: [25.95315607 43.60726175 55.55597883 64.63441774 72.61542622 78.7572436
 83.08330294 85.69988983 87.92631912 90.09681471 91.59495429 92.82037136]
inertia: 39379.205423124506


## load data with lead time bias corrected anomalies

In [5]:
# era5 data
z500_era5, z500_era5_dt = som_analysis.open_era5_files(
    variable='z500', return_time=True,
    lat0=lat0, lat1=lat1, lon0=lon0, lon1=lon1,
    leadday0=0, leadday1=42, rolldays=5)

# cesm data
z500_cesm, z500_cesm_dt = som_analysis.open_cesm_files(
    variable='zg_500', return_time=True,
    lat0=lat0, lat1=lat1, lon0=lon0, lon1=lon1,
    leadday0=0, leadday1=42, rolldays=5)

# restructure arrays
z500_standard_era5 = z500_era5.stack(
    new=('time', 'lead'), flat=('lat', 'lon')).transpose('new', 'flat')
z500_standard_cesm = z500_cesm.stack(
    new=('time', 'lead'), flat=('lat', 'lon')).transpose('new', 'flat')

## composites of the weather types/regimes

In [6]:
# grab cluster indices

z500_era5_tmp_1, z500_era5_tmp_2, z500_era5_tmp_3, z500_era5_tmp_4 = cluster_analysis.composite_clusters_indx(
    z500_standard_era5, k_means, pca_obj, use_pca=True)

z500_cesm_tmp_1, z500_cesm_tmp_2, z500_cesm_tmp_3, z500_cesm_tmp_4 = cluster_analysis.composite_clusters_indx(
    z500_standard_cesm, k_means, pca_obj, use_pca=True)

## outgoing longwave radiation

In [7]:
# era5 data
rlut_era5, _ = som_analysis.open_era5_files(
    variable='ttr', return_time=True,
    lat0=-50, lat1=50, lon0=20-360, lon1=-20,
    leadday0=0, leadday1=42, rolldays=5)

# cesm data
rlut_cesm, _ = som_analysis.open_cesm_files(
    variable='rlut', return_time=True,
    lat0=-50, lat1=50, lon0=20-360, lon1=-20,
    leadday0=0, leadday1=42, rolldays=5)

# restructure data array
rlut_era5_tmp = rlut_era5.stack(
    new=('time', 'lead'), flat=('lat', 'lon')).transpose('new', 'flat')

rlut_cesm_tmp = rlut_cesm.stack(
    new=('time', 'lead'), flat=('lat', 'lon')).transpose('new', 'flat')

In [8]:
# extract clusters using indices

rlut_era5_tmp_01 = rlut_era5_tmp.unstack('flat').transpose(
    'new', 'lat', 'lon')[z500_era5_tmp_1, :, :]
rlut_era5_tmp_02 = rlut_era5_tmp.unstack('flat').transpose(
    'new', 'lat', 'lon')[z500_era5_tmp_2, :, :]
rlut_era5_tmp_03 = rlut_era5_tmp.unstack('flat').transpose(
    'new', 'lat', 'lon')[z500_era5_tmp_3, :, :]
rlut_era5_tmp_04 = rlut_era5_tmp.unstack('flat').transpose(
    'new', 'lat', 'lon')[z500_era5_tmp_4, :, :]

rlut_cesm_tmp_01 = rlut_cesm_tmp.unstack('flat').transpose(
    'new', 'lat', 'lon')[z500_cesm_tmp_1, :, :]
rlut_cesm_tmp_02 = rlut_cesm_tmp.unstack('flat').transpose(
    'new', 'lat', 'lon')[z500_cesm_tmp_2, :, :]
rlut_cesm_tmp_03 = rlut_cesm_tmp.unstack('flat').transpose(
    'new', 'lat', 'lon')[z500_cesm_tmp_3, :, :]
rlut_cesm_tmp_04 = rlut_cesm_tmp.unstack('flat').transpose(
    'new', 'lat', 'lon')[z500_cesm_tmp_4, :, :]

## sea surface temperatures

In [9]:
# era5 data
sstk_era5, _ = som_analysis.open_era5_files(
    variable='sstk', return_time=True,
    lat0=-30, lat1=70, lon0=20-360, lon1=-20,
    leadday0=0, leadday1=42, rolldays=5)

# cesm data
sstk_cesm, _ = som_analysis.open_cesm_files(
    variable='sst', return_time=True,
    lat0=-30, lat1=70, lon0=20-360, lon1=-20,
    leadday0=0, leadday1=42, rolldays=5)

# restructure data array
sstk_era5_tmp = sstk_era5.stack(
    new=('time', 'lead'), flat=('lat', 'lon')).transpose('new', 'flat')

sstk_cesm_tmp = sstk_cesm.stack(
    new=('time', 'lead'), flat=('lat', 'lon')).transpose('new', 'flat')

In [10]:
# extract clusters using indices

sstk_era5_tmp_01 = sstk_era5_tmp.unstack('flat').transpose(
    'new', 'lat', 'lon')[z500_era5_tmp_1, :, :]
sstk_era5_tmp_02 = sstk_era5_tmp.unstack('flat').transpose(
    'new', 'lat', 'lon')[z500_era5_tmp_2, :, :]
sstk_era5_tmp_03 = sstk_era5_tmp.unstack('flat').transpose(
    'new', 'lat', 'lon')[z500_era5_tmp_3, :, :]
sstk_era5_tmp_04 = sstk_era5_tmp.unstack('flat').transpose(
    'new', 'lat', 'lon')[z500_era5_tmp_4, :, :]

sstk_cesm_tmp_01 = sstk_cesm_tmp.unstack('flat').transpose(
    'new', 'lat', 'lon')[z500_cesm_tmp_1, :, :]
sstk_cesm_tmp_02 = sstk_cesm_tmp.unstack('flat').transpose(
    'new', 'lat', 'lon')[z500_cesm_tmp_2, :, :]
sstk_cesm_tmp_03 = sstk_cesm_tmp.unstack('flat').transpose(
    'new', 'lat', 'lon')[z500_cesm_tmp_3, :, :]
sstk_cesm_tmp_04 = sstk_cesm_tmp.unstack('flat').transpose(
    'new', 'lat', 'lon')[z500_cesm_tmp_4, :, :]

## bootstrap

In [11]:
firstday = 1
seconday = 14

lons_rlut = rlut_era5_tmp_01.lon.values
lats_rlut = rlut_era5_tmp_01.lat.values

lons_sstk = sstk_era5_tmp_01.lon.values
lats_sstk = sstk_era5_tmp_01.lat.values

In [12]:
boot_num_init_ = 1000
boot_num_iter_ = 10000

## olr era5 bootstrap

In [13]:
tmp_all = rlut_era5_tmp.unstack('new').unstack('flat').isel(
    lead=slice(firstday, seconday)).stack(
    new=('time', 'lead')).transpose('new', 'lat', 'lon').values

In [14]:
tmp_data = rlut_era5_tmp_01.unstack('new').isel(lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).transpose('new', 'lat', 'lon').values

for ind in range(boot_num_init_, boot_num_iter_):

    np.random.seed(ind + 1)
    rand_indx = [np.random.choice(tmp_all.shape[0]) for i in range(
        tmp_data.shape[0])]
    boot_ = np.nanmean(tmp_all[rand_indx, ...], axis=0)

    xr.Dataset(
        data_vars=dict(
            iteration=(["lat", "lon"], boot_),
        ),
        coords=dict(
            lon=(["lon"], lons_rlut),
            lat=(["lat"], lats_rlut),
        ),
        attrs=dict(description="For bootstrap confidence intervals."),
    ).to_netcdf(
        f'/glade/scratch/molina/s2s/bootstrap/olr_era5_wr1_week12/olr_era5_boot_{ind + 1}.nc')

In [14]:
tmp_data = rlut_era5_tmp_02.unstack('new').isel(lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).transpose('new', 'lat', 'lon').values

for ind in range(4620, boot_num_iter_):

    np.random.seed(ind + 1)
    rand_indx = [np.random.choice(tmp_all.shape[0]) for i in range(
        tmp_data.shape[0])]
    boot_ = np.nanmean(tmp_all[rand_indx, ...], axis=0)

    xr.Dataset(
        data_vars=dict(
            iteration=(["lat", "lon"], boot_),
        ),
        coords=dict(
            lon=(["lon"], lons_rlut),
            lat=(["lat"], lats_rlut),
        ),
        attrs=dict(description="For bootstrap confidence intervals."),
    ).to_netcdf(
        f'/glade/scratch/molina/s2s/bootstrap/olr_era5_wr2_week12/olr_era5_boot_{ind + 1}.nc')

In [14]:
tmp_data = rlut_era5_tmp_03.unstack('new').isel(lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).transpose('new', 'lat', 'lon').values

for ind in range(7937, boot_num_iter_):

    np.random.seed(ind + 1)
    rand_indx = [np.random.choice(tmp_all.shape[0]) for i in range(
        tmp_data.shape[0])]
    boot_ = np.nanmean(tmp_all[rand_indx, ...], axis=0)

    xr.Dataset(
        data_vars=dict(
            iteration=(["lat", "lon"], boot_),
        ),
        coords=dict(
            lon=(["lon"], lons_rlut),
            lat=(["lat"], lats_rlut),
        ),
        attrs=dict(description="For bootstrap confidence intervals."),
    ).to_netcdf(
        f'/glade/scratch/molina/s2s/bootstrap/olr_era5_wr3_week12/olr_era5_boot_{ind + 1}.nc')

In [None]:
tmp_data = rlut_era5_tmp_04.unstack('new').isel(lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).transpose('new', 'lat', 'lon').values

for ind in range(7840, boot_num_iter_):

    np.random.seed(ind + 1)
    rand_indx = [np.random.choice(tmp_all.shape[0]) for i in range(
        tmp_data.shape[0])]
    boot_ = np.nanmean(tmp_all[rand_indx, ...], axis=0)

    xr.Dataset(
        data_vars=dict(
            iteration=(["lat", "lon"], boot_),
        ),
        coords=dict(
            lon=(["lon"], lons_rlut),
            lat=(["lat"], lats_rlut),
        ),
        attrs=dict(description="For bootstrap confidence intervals."),
    ).to_netcdf(
        f'/glade/scratch/molina/s2s/bootstrap/olr_era5_wr4_week12/olr_era5_boot_{ind + 1}.nc')

## olr cesm bootstrap

In [13]:
tmp_all = rlut_cesm_tmp.unstack('new').unstack('flat').isel(
    lead=slice(firstday, seconday)).stack(
    new=('time', 'lead')).transpose('new', 'lat', 'lon').values

In [16]:
tmp_data = rlut_cesm_tmp_01.unstack('new').isel(lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).transpose('new', 'lat', 'lon').values

for ind in range(boot_num_init_, boot_num_iter_):

    np.random.seed(ind + 1)
    rand_indx = [np.random.choice(tmp_all.shape[0]) for i in range(
        tmp_data.shape[0])]
    boot_ = np.nanmean(tmp_all[rand_indx, ...], axis=0)

    xr.Dataset(
        data_vars=dict(
            iteration=(["lat", "lon"], boot_),
        ),
        coords=dict(
            lon=(["lon"], lons_rlut),
            lat=(["lat"], lats_rlut),
        ),
        attrs=dict(description="For bootstrap confidence intervals."),
    ).to_netcdf(
        f'/glade/scratch/molina/s2s/bootstrap/olr_cesm_wr1_week12/olr_cesm_boot_{ind + 1}.nc')

In [None]:
tmp_data = rlut_cesm_tmp_02.unstack('new').isel(lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).transpose('new', 'lat', 'lon').values

for ind in range(3884, boot_num_iter_):

    np.random.seed(ind + 1)
    rand_indx = [np.random.choice(tmp_all.shape[0]) for i in range(
        tmp_data.shape[0])]
    boot_ = np.nanmean(tmp_all[rand_indx, ...], axis=0)

    xr.Dataset(
        data_vars=dict(
            iteration=(["lat", "lon"], boot_),
        ),
        coords=dict(
            lon=(["lon"], lons_rlut),
            lat=(["lat"], lats_rlut),
        ),
        attrs=dict(description="For bootstrap confidence intervals."),
    ).to_netcdf(
        f'/glade/scratch/molina/s2s/bootstrap/olr_cesm_wr2_week12/olr_cesm_boot_{ind + 1}.nc')

In [None]:
tmp_data = rlut_cesm_tmp_03.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).transpose('new', 'lat', 'lon').values

for ind in range(6619, boot_num_iter_):

    np.random.seed(ind + 1)
    rand_indx = [np.random.choice(tmp_all.shape[0]) for i in range(
        tmp_data.shape[0])]
    boot_ = np.nanmean(tmp_all[rand_indx, ...], axis=0)

    xr.Dataset(
        data_vars=dict(
            iteration=(["lat", "lon"], boot_),
        ),
        coords=dict(
            lon=(["lon"], lons_rlut),
            lat=(["lat"], lats_rlut),
        ),
        attrs=dict(description="For bootstrap confidence intervals."),
    ).to_netcdf(
        f'/glade/scratch/molina/s2s/bootstrap/olr_cesm_wr3_week12/olr_cesm_boot_{ind + 1}.nc')

In [14]:
tmp_data = rlut_cesm_tmp_04.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).transpose('new', 'lat', 'lon').values

for ind in range(8957, boot_num_iter_):

    np.random.seed(ind + 1)
    rand_indx = [np.random.choice(tmp_all.shape[0]) for i in range(
        tmp_data.shape[0])]
    boot_ = np.nanmean(tmp_all[rand_indx, ...], axis=0)

    xr.Dataset(
        data_vars=dict(
            iteration=(["lat", "lon"], boot_),
        ),
        coords=dict(
            lon=(["lon"], lons_rlut),
            lat=(["lat"], lats_rlut),
        ),
        attrs=dict(description="For bootstrap confidence intervals."),
    ).to_netcdf(
        f'/glade/scratch/molina/s2s/bootstrap/olr_cesm_wr4_week12/olr_cesm_boot_{ind + 1}.nc')

## sst era5 bootstrap

In [13]:
tmp_all = sstk_era5_tmp.unstack('new').unstack('flat').isel(
    lead=slice(firstday, seconday)).stack(
    new=('time', 'lead')).transpose('new', 'lat', 'lon').values

In [None]:
tmp_data = sstk_era5_tmp_01.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).transpose('new', 'lat', 'lon').values

for ind in range(boot_num_init_, boot_num_iter_):

    np.random.seed(ind + 1)
    rand_indx = [np.random.choice(tmp_all.shape[0]) for i in range(
        tmp_data.shape[0])]
    boot_ = np.nanmean(tmp_all[rand_indx, ...], axis=0)

    xr.Dataset(
        data_vars=dict(
            iteration=(["lat", "lon"], boot_),
        ),
        coords=dict(
            lon=(["lon"], lons_sstk),
            lat=(["lat"], lats_sstk),
        ),
        attrs=dict(description="For bootstrap confidence intervals."),
    ).to_netcdf(
        f'/glade/scratch/molina/s2s/bootstrap/sst_era5_wr1_week12/sst_era5_boot_{ind + 1}.nc')

  boot_ = np.nanmean(tmp_all[rand_indx, ...], axis=0)


In [None]:
tmp_data = sstk_era5_tmp_02.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).transpose('new', 'lat', 'lon').values

for ind in range(2946, boot_num_iter_):

    np.random.seed(ind + 1)
    rand_indx = [np.random.choice(tmp_all.shape[0]) for i in range(
        tmp_data.shape[0])]
    boot_ = np.nanmean(tmp_all[rand_indx, ...], axis=0)

    xr.Dataset(
        data_vars=dict(
            iteration=(["lat", "lon"], boot_),
        ),
        coords=dict(
            lon=(["lon"], lons_sstk),
            lat=(["lat"], lats_sstk),
        ),
        attrs=dict(description="For bootstrap confidence intervals."),
    ).to_netcdf(
        f'/glade/scratch/molina/s2s/bootstrap/sst_era5_wr2_week12/sst_era5_boot_{ind + 1}.nc')

  boot_ = np.nanmean(tmp_all[rand_indx, ...], axis=0)


In [None]:
tmp_data = sstk_era5_tmp_03.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).transpose('new', 'lat', 'lon').values

for ind in range(5547, boot_num_iter_):

    np.random.seed(ind + 1)
    rand_indx = [np.random.choice(tmp_all.shape[0]) for i in range(
        tmp_data.shape[0])]
    boot_ = np.nanmean(tmp_all[rand_indx, ...], axis=0)

    xr.Dataset(
        data_vars=dict(
            iteration=(["lat", "lon"], boot_),
        ),
        coords=dict(
            lon=(["lon"], lons_sstk),
            lat=(["lat"], lats_sstk),
        ),
        attrs=dict(description="For bootstrap confidence intervals."),
    ).to_netcdf(
        f'/glade/scratch/molina/s2s/bootstrap/sst_era5_wr3_week12/sst_era5_boot_{ind + 1}.nc')

  boot_ = np.nanmean(tmp_all[rand_indx, ...], axis=0)


In [14]:
tmp_data = sstk_era5_tmp_04.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).transpose('new', 'lat', 'lon').values

for ind in range(8064, boot_num_iter_):

    np.random.seed(ind + 1)
    rand_indx = [np.random.choice(tmp_all.shape[0]) for i in range(
        tmp_data.shape[0])]
    boot_ = np.nanmean(tmp_all[rand_indx, ...], axis=0)

    xr.Dataset(
        data_vars=dict(
            iteration=(["lat", "lon"], boot_),
        ),
        coords=dict(
            lon=(["lon"], lons_sstk),
            lat=(["lat"], lats_sstk),
        ),
        attrs=dict(description="For bootstrap confidence intervals."),
    ).to_netcdf(
        f'/glade/scratch/molina/s2s/bootstrap/sst_era5_wr4_week12/sst_era5_boot_{ind + 1}.nc')

  boot_ = np.nanmean(tmp_all[rand_indx, ...], axis=0)


## sst cesm bootstrap

In [13]:
tmp_all = sstk_cesm_tmp.unstack('new').unstack('flat').isel(
    lead=slice(firstday, seconday)).stack(
    new=('time', 'lead')).transpose('new', 'lat', 'lon').values

In [16]:
tmp_data = sstk_cesm_tmp_01.unstack('new').isel(lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).transpose('new', 'lat', 'lon').values

for ind in range(boot_num_init_, boot_num_iter_):

    np.random.seed(ind + 1)
    rand_indx = [np.random.choice(tmp_all.shape[0]) for i in range(tmp_data.shape[0])]
    boot_ = np.nanmean(tmp_all[rand_indx, ...], axis=0)

    xr.Dataset(
        data_vars=dict(
            iteration=(["lat", "lon"], boot_),
        ),
        coords=dict(
            lon=(["lon"], lons_sstk),
            lat=(["lat"], lats_sstk),
        ),
        attrs=dict(description="For bootstrap confidence intervals."),
    ).to_netcdf(
        f'/glade/scratch/molina/s2s/bootstrap/sst_cesm_wr1_week12/sst_cesm_boot_{ind + 1}.nc')

In [14]:
tmp_data = sstk_cesm_tmp_02.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).transpose('new', 'lat', 'lon').values

for ind in range(3844, boot_num_iter_):

    np.random.seed(ind + 1)
    rand_indx = [np.random.choice(tmp_all.shape[0]) for i in range(
        tmp_data.shape[0])]
    boot_ = np.nanmean(tmp_all[rand_indx, ...], axis=0)

    xr.Dataset(
        data_vars=dict(
            iteration=(["lat", "lon"], boot_),
        ),
        coords=dict(
            lon=(["lon"], lons_sstk),
            lat=(["lat"], lats_sstk),
        ),
        attrs=dict(description="For bootstrap confidence intervals."),
    ).to_netcdf(
        f'/glade/scratch/molina/s2s/bootstrap/sst_cesm_wr2_week12/sst_cesm_boot_{ind + 1}.nc')

In [None]:
tmp_data = sstk_cesm_tmp_03.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).transpose('new', 'lat', 'lon').values

for ind in range(7662, boot_num_iter_):

    np.random.seed(ind + 1)
    rand_indx = [np.random.choice(tmp_all.shape[0]) for i in range(
        tmp_data.shape[0])]
    boot_ = np.nanmean(tmp_all[rand_indx, ...], axis=0)

    xr.Dataset(
        data_vars=dict(
            iteration=(["lat", "lon"], boot_),
        ),
        coords=dict(
            lon=(["lon"], lons_sstk),
            lat=(["lat"], lats_sstk),
        ),
        attrs=dict(description="For bootstrap confidence intervals."),
    ).to_netcdf(
        f'/glade/scratch/molina/s2s/bootstrap/sst_cesm_wr3_week12/sst_cesm_boot_{ind + 1}.nc')

In [None]:
tmp_data = sstk_cesm_tmp_04.unstack('new').isel(
    lead=slice(firstday, seconday)).stack(
    new=('lead', 'time')).transpose('new', 'lat', 'lon').values

for ind in range(boot_num_init_, boot_num_iter_):

    np.random.seed(ind + 1)
    rand_indx = [np.random.choice(tmp_all.shape[0]) for i in range(
        tmp_data.shape[0])]
    boot_ = np.nanmean(tmp_all[rand_indx, ...], axis=0)

    xr.Dataset(
        data_vars=dict(
            iteration=(["lat", "lon"], boot_),
        ),
        coords=dict(
            lon=(["lon"], lons_sstk),
            lat=(["lat"], lats_sstk),
        ),
        attrs=dict(description="For bootstrap confidence intervals."),
    ).to_netcdf(
        f'/glade/scratch/molina/s2s/bootstrap/sst_cesm_wr4_week12/sst_cesm_boot_{ind + 1}.nc')

## compute bootstrap percentiles (olr)

In [13]:
lev_1 = 0.025
lev_2 = 0.975
lev_3 = 0.005
lev_4 = 0.995

# era5

tmp_era5_wr1 = xr.open_mfdataset(
    '/glade/scratch/molina/s2s/bootstrap/olr_era5_wr1_week12/olr_era5_boot_*.nc',
    combine='nested', concat_dim='iter').chunk(
    dict(iter=-1)).quantile([lev_1, lev_2, lev_3, lev_4], dim='iter', skipna=True)

tmp_era5_wr2 = xr.open_mfdataset(
    '/glade/scratch/molina/s2s/bootstrap/olr_era5_wr2_week12/olr_era5_boot_*.nc',
    combine='nested', concat_dim='iter').chunk(
    dict(iter=-1)).quantile([lev_1, lev_2, lev_3, lev_4], dim='iter', skipna=True)

tmp_era5_wr3 = xr.open_mfdataset(
    '/glade/scratch/molina/s2s/bootstrap/olr_era5_wr3_week12/olr_era5_boot_*.nc',
    combine='nested', concat_dim='iter').chunk(
    dict(iter=-1)).quantile([lev_1, lev_2, lev_3, lev_4], dim='iter', skipna=True)

tmp_era5_wr4 = xr.open_mfdataset(
    '/glade/scratch/molina/s2s/bootstrap/olr_era5_wr4_week12/olr_era5_boot_*.nc',
    combine='nested', concat_dim='iter').chunk(
    dict(iter=-1)).quantile([lev_1, lev_2, lev_3, lev_4], dim='iter', skipna=True)

# cesm

tmp_cesm_wr1 = xr.open_mfdataset(
    '/glade/scratch/molina/s2s/bootstrap/olr_cesm_wr1_week12/olr_cesm_boot_*.nc',
    combine='nested', concat_dim='iter').chunk(
    dict(iter=-1)).quantile([lev_1, lev_2, lev_3, lev_4], dim='iter', skipna=True)

tmp_cesm_wr2 = xr.open_mfdataset(
    '/glade/scratch/molina/s2s/bootstrap/olr_cesm_wr2_week12/olr_cesm_boot_*.nc',
    combine='nested', concat_dim='iter').chunk(
    dict(iter=-1)).quantile([lev_1, lev_2, lev_3, lev_4], dim='iter', skipna=True)

tmp_cesm_wr3 = xr.open_mfdataset(
    '/glade/scratch/molina/s2s/bootstrap/olr_cesm_wr3_week12/olr_cesm_boot_*.nc',
    combine='nested', concat_dim='iter').chunk(
    dict(iter=-1)).quantile([lev_1, lev_2, lev_3, lev_4], dim='iter', skipna=True)

tmp_cesm_wr4 = xr.open_mfdataset(
    '/glade/scratch/molina/s2s/bootstrap/olr_cesm_wr4_week12/olr_cesm_boot_*.nc',
    combine='nested', concat_dim='iter').chunk(
    dict(iter=-1)).quantile([lev_1, lev_2, lev_3, lev_4], dim='iter', skipna=True)

## assemble dataset

In [14]:
ds_olr = xr.Dataset(

    data_vars=dict(

        wr1_era5=(["lat", "lon"], rlut_era5_tmp_01.unstack('new').isel(
            lead=slice(firstday, seconday)).stack(new=('lead', 'time')).mean(
            'new', skipna=True).values),
        wr2_era5=(["lat", "lon"], rlut_era5_tmp_02.unstack('new').isel(
            lead=slice(firstday, seconday)).stack(new=('lead', 'time')).mean(
            'new', skipna=True).values),
        wr3_era5=(["lat", "lon"], rlut_era5_tmp_03.unstack('new').isel(
            lead=slice(firstday, seconday)).stack(new=('lead', 'time')).mean(
            'new', skipna=True).values),
        wr4_era5=(["lat", "lon"], rlut_era5_tmp_04.unstack('new').isel(
            lead=slice(firstday, seconday)).stack(new=('lead', 'time')).mean(
            'new', skipna=True).values),

        wr1_era5_025=(["lat", "lon"], tmp_era5_wr1.sel(
            quantile=0.025)['iteration'].transpose('lat', 'lon').values),
        wr1_era5_975=(["lat", "lon"], tmp_era5_wr1.sel(
            quantile=0.975)['iteration'].transpose('lat', 'lon').values),
        wr1_era5_005=(["lat", "lon"], tmp_era5_wr1.sel(
            quantile=0.005)['iteration'].transpose('lat', 'lon').values),
        wr1_era5_995=(["lat", "lon"], tmp_era5_wr1.sel(
            quantile=0.995)['iteration'].transpose('lat', 'lon').values),

        wr2_era5_025=(["lat", "lon"], tmp_era5_wr2.sel(
            quantile=0.025)['iteration'].transpose('lat', 'lon').values),
        wr2_era5_975=(["lat", "lon"], tmp_era5_wr2.sel(
            quantile=0.975)['iteration'].transpose('lat', 'lon').values),
        wr2_era5_005=(["lat", "lon"], tmp_era5_wr2.sel(
            quantile=0.005)['iteration'].transpose('lat', 'lon').values),
        wr2_era5_995=(["lat", "lon"], tmp_era5_wr2.sel(
            quantile=0.995)['iteration'].transpose('lat', 'lon').values),

        wr3_era5_025=(["lat", "lon"], tmp_era5_wr3.sel(
            quantile=0.025)['iteration'].transpose('lat', 'lon').values),
        wr3_era5_975=(["lat", "lon"], tmp_era5_wr3.sel(
            quantile=0.975)['iteration'].transpose('lat', 'lon').values),
        wr3_era5_005=(["lat", "lon"], tmp_era5_wr3.sel(
            quantile=0.005)['iteration'].transpose('lat', 'lon').values),
        wr3_era5_995=(["lat", "lon"], tmp_era5_wr3.sel(
            quantile=0.995)['iteration'].transpose('lat', 'lon').values),

        wr4_era5_025=(["lat", "lon"], tmp_era5_wr4.sel(
            quantile=0.025)['iteration'].transpose('lat', 'lon').values),
        wr4_era5_975=(["lat", "lon"], tmp_era5_wr4.sel(
            quantile=0.975)['iteration'].transpose('lat', 'lon').values),
        wr4_era5_005=(["lat", "lon"], tmp_era5_wr4.sel(
            quantile=0.005)['iteration'].transpose('lat', 'lon').values),
        wr4_era5_995=(["lat", "lon"], tmp_era5_wr4.sel(
            quantile=0.995)['iteration'].transpose('lat', 'lon').values),

        wr1_cesm=(["lat", "lon"], rlut_cesm_tmp_01.unstack('new').isel(
            lead=slice(firstday, seconday)).stack(new=('lead', 'time')).mean(
            'new', skipna=True).values),
        wr2_cesm=(["lat", "lon"], rlut_cesm_tmp_02.unstack('new').isel(
            lead=slice(firstday, seconday)).stack(new=('lead', 'time')).mean(
            'new', skipna=True).values),
        wr3_cesm=(["lat", "lon"], rlut_cesm_tmp_03.unstack('new').isel(
            lead=slice(firstday, seconday)).stack(new=('lead', 'time')).mean(
            'new', skipna=True).values),
        wr4_cesm=(["lat", "lon"], rlut_cesm_tmp_04.unstack('new').isel(
            lead=slice(firstday, seconday)).stack(new=('lead', 'time')).mean(
            'new', skipna=True).values),

        wr1_cesm_025=(["lat", "lon"], tmp_cesm_wr1.sel(
            quantile=0.025)['iteration'].transpose('lat', 'lon').values),
        wr1_cesm_975=(["lat", "lon"], tmp_cesm_wr1.sel(
            quantile=0.975)['iteration'].transpose('lat', 'lon').values),
        wr1_cesm_005=(["lat", "lon"], tmp_cesm_wr1.sel(
            quantile=0.005)['iteration'].transpose('lat', 'lon').values),
        wr1_cesm_995=(["lat", "lon"], tmp_cesm_wr1.sel(
            quantile=0.995)['iteration'].transpose('lat', 'lon').values),

        wr2_cesm_025=(["lat", "lon"], tmp_cesm_wr2.sel(
            quantile=0.025)['iteration'].transpose('lat', 'lon').values),
        wr2_cesm_975=(["lat", "lon"], tmp_cesm_wr2.sel(
            quantile=0.975)['iteration'].transpose('lat', 'lon').values),
        wr2_cesm_005=(["lat", "lon"], tmp_cesm_wr2.sel(
            quantile=0.005)['iteration'].transpose('lat', 'lon').values),
        wr2_cesm_995=(["lat", "lon"], tmp_cesm_wr2.sel(
            quantile=0.995)['iteration'].transpose('lat', 'lon').values),

        wr3_cesm_025=(["lat", "lon"], tmp_cesm_wr3.sel(
            quantile=0.025)['iteration'].transpose('lat', 'lon').values),
        wr3_cesm_975=(["lat", "lon"], tmp_cesm_wr3.sel(
            quantile=0.975)['iteration'].transpose('lat', 'lon').values),
        wr3_cesm_005=(["lat", "lon"], tmp_cesm_wr3.sel(
            quantile=0.005)['iteration'].transpose('lat', 'lon').values),
        wr3_cesm_995=(["lat", "lon"], tmp_cesm_wr3.sel(
            quantile=0.995)['iteration'].transpose('lat', 'lon').values),

        wr4_cesm_025=(["lat", "lon"], tmp_cesm_wr4.sel(
            quantile=0.025)['iteration'].transpose('lat', 'lon').values),
        wr4_cesm_975=(["lat", "lon"], tmp_cesm_wr4.sel(
            quantile=0.975)['iteration'].transpose('lat', 'lon').values),
        wr4_cesm_005=(["lat", "lon"], tmp_cesm_wr4.sel(
            quantile=0.005)['iteration'].transpose('lat', 'lon').values),
        wr4_cesm_995=(["lat", "lon"], tmp_cesm_wr4.sel(
            quantile=0.995)['iteration'].transpose('lat', 'lon').values),
    ),

    coords=dict(
        lon=(["lon"], lons_rlut),
        lat=(["lat"], lats_rlut),
    ),

    attrs=dict(description="Figure data for weather regimes research."),
)

## save file

In [15]:
ds_olr.to_netcdf(
    '/glade/scratch/molina/s2s/bootstrap/rlut_week12_wxregimes.nc')

## compute bootstrap percentiles (sst)

In [16]:
lev_1 = 0.025
lev_2 = 0.975
lev_3 = 0.005
lev_4 = 0.995

# era5

tmp_era5_wr1 = xr.open_mfdataset(
    '/glade/scratch/molina/s2s/bootstrap/sst_era5_wr1_week12/sst_era5_boot_*.nc',
    combine='nested', concat_dim='iter').chunk(
    dict(iter=-1)).quantile([lev_1, lev_2, lev_3, lev_4], dim='iter', skipna=True)

tmp_era5_wr2 = xr.open_mfdataset(
    '/glade/scratch/molina/s2s/bootstrap/sst_era5_wr2_week12/sst_era5_boot_*.nc',
    combine='nested', concat_dim='iter').chunk(
    dict(iter=-1)).quantile([lev_1, lev_2, lev_3, lev_4], dim='iter', skipna=True)

tmp_era5_wr3 = xr.open_mfdataset(
    '/glade/scratch/molina/s2s/bootstrap/sst_era5_wr3_week12/sst_era5_boot_*.nc',
    combine='nested', concat_dim='iter').chunk(
    dict(iter=-1)).quantile([lev_1, lev_2, lev_3, lev_4], dim='iter', skipna=True)

tmp_era5_wr4 = xr.open_mfdataset(
    '/glade/scratch/molina/s2s/bootstrap/sst_era5_wr4_week12/sst_era5_boot_*.nc',
    combine='nested', concat_dim='iter').chunk(
    dict(iter=-1)).quantile([lev_1, lev_2, lev_3, lev_4], dim='iter', skipna=True)

# cesm

tmp_cesm_wr1 = xr.open_mfdataset(
    '/glade/scratch/molina/s2s/bootstrap/sst_cesm_wr1_week12/sst_cesm_boot_*.nc',
    combine='nested', concat_dim='iter').chunk(
    dict(iter=-1)).quantile([lev_1, lev_2, lev_3, lev_4], dim='iter', skipna=True)

tmp_cesm_wr2 = xr.open_mfdataset(
    '/glade/scratch/molina/s2s/bootstrap/sst_cesm_wr2_week12/sst_cesm_boot_*.nc',
    combine='nested', concat_dim='iter').chunk(
    dict(iter=-1)).quantile([lev_1, lev_2, lev_3, lev_4], dim='iter', skipna=True)

tmp_cesm_wr3 = xr.open_mfdataset(
    '/glade/scratch/molina/s2s/bootstrap/sst_cesm_wr3_week12/sst_cesm_boot_*.nc',
    combine='nested', concat_dim='iter').chunk(
    dict(iter=-1)).quantile([lev_1, lev_2, lev_3, lev_4], dim='iter', skipna=True)

tmp_cesm_wr4 = xr.open_mfdataset(
    '/glade/scratch/molina/s2s/bootstrap/sst_cesm_wr4_week12/sst_cesm_boot_*.nc',
    combine='nested', concat_dim='iter').chunk(
    dict(iter=-1)).quantile([lev_1, lev_2, lev_3, lev_4], dim='iter', skipna=True)

## assemble dataset

In [17]:
ds_sst = xr.Dataset(
    
    data_vars=dict(
        
        wr1_era5=(["lat", "lon"], sstk_era5_tmp_01.unstack('new').isel(lead=slice(firstday,seconday)).stack(
                                      new=('lead','time')).mean('new',skipna=True).values),
        wr2_era5=(["lat", "lon"], sstk_era5_tmp_02.unstack('new').isel(lead=slice(firstday,seconday)).stack(
                                      new=('lead','time')).mean('new',skipna=True).values),
        wr3_era5=(["lat", "lon"], sstk_era5_tmp_03.unstack('new').isel(lead=slice(firstday,seconday)).stack(
                                      new=('lead','time')).mean('new',skipna=True).values),
        wr4_era5=(["lat", "lon"], sstk_era5_tmp_04.unstack('new').isel(lead=slice(firstday,seconday)).stack(
                                      new=('lead','time')).mean('new',skipna=True).values),
        
        wr1_era5_025=(["lat", "lon"], tmp_era5_wr1.sel(quantile=0.025)['iteration'].transpose('lat','lon').values),
        wr1_era5_975=(["lat", "lon"], tmp_era5_wr1.sel(quantile=0.975)['iteration'].transpose('lat','lon').values),
        wr1_era5_005=(["lat", "lon"], tmp_era5_wr1.sel(quantile=0.005)['iteration'].transpose('lat','lon').values),
        wr1_era5_995=(["lat", "lon"], tmp_era5_wr1.sel(quantile=0.995)['iteration'].transpose('lat','lon').values),
        
        wr2_era5_025=(["lat", "lon"], tmp_era5_wr2.sel(quantile=0.025)['iteration'].transpose('lat','lon').values),
        wr2_era5_975=(["lat", "lon"], tmp_era5_wr2.sel(quantile=0.975)['iteration'].transpose('lat','lon').values),
        wr2_era5_005=(["lat", "lon"], tmp_era5_wr2.sel(quantile=0.005)['iteration'].transpose('lat','lon').values),
        wr2_era5_995=(["lat", "lon"], tmp_era5_wr2.sel(quantile=0.995)['iteration'].transpose('lat','lon').values),
        
        wr3_era5_025=(["lat", "lon"], tmp_era5_wr3.sel(quantile=0.025)['iteration'].transpose('lat','lon').values),
        wr3_era5_975=(["lat", "lon"], tmp_era5_wr3.sel(quantile=0.975)['iteration'].transpose('lat','lon').values),
        wr3_era5_005=(["lat", "lon"], tmp_era5_wr3.sel(quantile=0.005)['iteration'].transpose('lat','lon').values),
        wr3_era5_995=(["lat", "lon"], tmp_era5_wr3.sel(quantile=0.995)['iteration'].transpose('lat','lon').values),
        
        wr4_era5_025=(["lat", "lon"], tmp_era5_wr4.sel(quantile=0.025)['iteration'].transpose('lat','lon').values),
        wr4_era5_975=(["lat", "lon"], tmp_era5_wr4.sel(quantile=0.975)['iteration'].transpose('lat','lon').values),
        wr4_era5_005=(["lat", "lon"], tmp_era5_wr4.sel(quantile=0.005)['iteration'].transpose('lat','lon').values),
        wr4_era5_995=(["lat", "lon"], tmp_era5_wr4.sel(quantile=0.995)['iteration'].transpose('lat','lon').values),
        
        
        wr1_cesm=(["lat", "lon"], sstk_cesm_tmp_01.unstack('new').isel(lead=slice(firstday,seconday)).stack(
                                      new=('lead','time')).mean('new',skipna=True).values),
        wr2_cesm=(["lat", "lon"], sstk_cesm_tmp_02.unstack('new').isel(lead=slice(firstday,seconday)).stack(
                                      new=('lead','time')).mean('new',skipna=True).values),
        wr3_cesm=(["lat", "lon"], sstk_cesm_tmp_03.unstack('new').isel(lead=slice(firstday,seconday)).stack(
                                      new=('lead','time')).mean('new',skipna=True).values),
        wr4_cesm=(["lat", "lon"], sstk_cesm_tmp_04.unstack('new').isel(lead=slice(firstday,seconday)).stack(
                                      new=('lead','time')).mean('new',skipna=True).values),
        
        wr1_cesm_025=(["lat", "lon"], tmp_cesm_wr1.sel(quantile=0.025)['iteration'].transpose('lat','lon').values),
        wr1_cesm_975=(["lat", "lon"], tmp_cesm_wr1.sel(quantile=0.975)['iteration'].transpose('lat','lon').values),
        wr1_cesm_005=(["lat", "lon"], tmp_cesm_wr1.sel(quantile=0.005)['iteration'].transpose('lat','lon').values),
        wr1_cesm_995=(["lat", "lon"], tmp_cesm_wr1.sel(quantile=0.995)['iteration'].transpose('lat','lon').values),
        
        wr2_cesm_025=(["lat", "lon"], tmp_cesm_wr2.sel(quantile=0.025)['iteration'].transpose('lat','lon').values),
        wr2_cesm_975=(["lat", "lon"], tmp_cesm_wr2.sel(quantile=0.975)['iteration'].transpose('lat','lon').values),
        wr2_cesm_005=(["lat", "lon"], tmp_cesm_wr2.sel(quantile=0.005)['iteration'].transpose('lat','lon').values),
        wr2_cesm_995=(["lat", "lon"], tmp_cesm_wr2.sel(quantile=0.995)['iteration'].transpose('lat','lon').values),
        
        wr3_cesm_025=(["lat", "lon"], tmp_cesm_wr3.sel(quantile=0.025)['iteration'].transpose('lat','lon').values),
        wr3_cesm_975=(["lat", "lon"], tmp_cesm_wr3.sel(quantile=0.975)['iteration'].transpose('lat','lon').values),
        wr3_cesm_005=(["lat", "lon"], tmp_cesm_wr3.sel(quantile=0.005)['iteration'].transpose('lat','lon').values),
        wr3_cesm_995=(["lat", "lon"], tmp_cesm_wr3.sel(quantile=0.995)['iteration'].transpose('lat','lon').values),
        
        wr4_cesm_025=(["lat", "lon"], tmp_cesm_wr4.sel(quantile=0.025)['iteration'].transpose('lat','lon').values),
        wr4_cesm_975=(["lat", "lon"], tmp_cesm_wr4.sel(quantile=0.975)['iteration'].transpose('lat','lon').values),
        wr4_cesm_005=(["lat", "lon"], tmp_cesm_wr4.sel(quantile=0.005)['iteration'].transpose('lat','lon').values),
        wr4_cesm_995=(["lat", "lon"], tmp_cesm_wr4.sel(quantile=0.995)['iteration'].transpose('lat','lon').values),
    ),
    
    coords=dict(
        lon=(["lon"], lons_sstk),
        lat=(["lat"], lats_sstk),
    ),
    
    attrs=dict(description="Figure data for weather regimes research."),
)

  result = np.apply_along_axis(_nanquantile_1d, axis, a, q,


## save file

In [18]:
ds_sst.to_netcdf(
    '/glade/scratch/molina/s2s/bootstrap/sstk_week12_wxregimes.nc')