## Imports

In [1]:
import warnings
from itertools import product
import glob
from datetime import datetime
from datetime import timedelta
import numpy as np
import pandas as pd
import xarray as xr

import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.path as mpath
import cartopy
import cartopy.crs as ccrs
import cartopy.feature
import cartopy.feature as cfeature
import cartopy.io.shapereader as shpreader
import cartopy.feature as cf
import shapely.geometry as sgeom

from sklearn.decomposition import PCA
from scipy import stats
from sklearn.cluster import KMeans
from sklearn import metrics
from scipy.spatial.distance import cdist
import xskillscore as xs

import som_analysis
import cluster_analysis
import narm_analysis

## functions

In [2]:
def add_insetmap(axes_extent, map_extent, lons, lats, temp_data, 
                 vmin, vmax, cmap='coolwarm'):
    
    use_projection = ccrs.Mercator()     # preserve shape well
    geodetic = ccrs.Geodetic(globe=ccrs.Globe(datum='WGS84'))
    
    sub_ax = plt.axes(axes_extent, projection=use_projection)  # normal units
    
    sub_ax.set_extent(map_extent, geodetic)  # map extents
    sub_ax.coastlines(linewidth=0.35, zorder=10)
    
    sub_ax.pcolormesh(lons, lats, temp_data, transform=ccrs.PlateCarree(),
                      vmin=vmin, vmax=vmax, cmap=cmap)

    extent_box = sgeom.box(map_extent[0], map_extent[2], map_extent[1], map_extent[3])
    sub_ax.add_geometries([extent_box], ccrs.PlateCarree(), color='none', linewidth=0.05)

In [3]:
def get_cold_indx(ds, mo_init=9, mo_end=2):
    """
    Extract indices for cold season.
    Grabbing Sept thru February init, for Oct thru March predictions.
    """
    dt_array = pd.to_datetime(ds['time'])
    return xr.where((dt_array.month>=mo_init) | (dt_array.month<=mo_end), True, False)

## open and preprocess training data

In [4]:
# region for clustering
lat0=10; lat1=70; lon0=-150; lon1=-40

# open era5 data and slice
ds_era5 = narm_analysis.era5_z500(lat0=lat0, lat1=lat1, lon0=lon0, lon1=lon1)

# era5 anomalies
ds_era5_anom = narm_analysis.era5_climo_wrs(ds_era5, rolling_days=5, variable='clim')

# restructure era5 array for machine learning training (SONDJFM)
ds_era5_anom = ds_era5_anom[get_cold_indx(ds_era5_anom, mo_init=10, mo_end=3),...]
ds_era5_train = ds_era5_anom.stack(flat=('lat','lon')).transpose('time','flat').values

## pca and kmeans with era5

In [5]:
# create pca object
pca_obj = PCA(12, whiten=True)

# fit pca with era5
pca_obj = pca_obj.fit(ds_era5_train)

# transform era5 data with pca
ds_era5_train = pca_obj.transform(ds_era5_train)

print(f'Variance explained: {pca_obj.explained_variance_ratio_ * 100}')
print(
f'Cumulative sum of variance explained for EOF1 and EOF2: {np.cumsum(pca_obj.explained_variance_ratio_) * 100}'
)

# train kmeans
k_means = KMeans(n_clusters=4,
                 init='k-means++',
                 n_init=10000,
                 max_iter=300,
                 tol=0.0001,
                 verbose=0,
                 random_state=0).fit(ds_era5_train)

print(f'inertia: {k_means.inertia_}')

Variance explained: [25.95315607 17.65410568 11.94871708  9.0784389   7.98100848  6.14181738
  4.32605934  2.61658689  2.22642929  2.17049559  1.49813958  1.22541708]
Cumulative sum of variance explained for EOF1 and EOF2: [25.95315607 43.60726175 55.55597883 64.63441774 72.61542622 78.7572436
 83.08330294 85.69988983 87.92631912 90.09681471 91.59495429 92.82037136]
inertia: 39379.20536675407


## extract WR indices for use later for bootstrap (significance)

In [6]:
### grab cluster indices for era5 for bootstrap later

z500_era5_boot_1, z500_era5_boot_2, z500_era5_boot_3, z500_era5_boot_4 = cluster_analysis.composite_clusters_indx(
    ds_era5_anom.stack(flat=('lat','lon')).transpose('time','flat'), k_means, pca_obj, use_pca=True)

In [7]:
# grab cluster indices for cesm for bootstrap later

ds_cesm_anom = narm_analysis.open_cesm_climo_wrs(lat0=10,lat1=70,lon0=-150,lon1=-40)
ds_cesm_anom = narm_analysis.cesm_climo_wrs(ds_cesm_anom, rolling_days=5, variable='zg_500')
ds_cesm_anom = ds_cesm_anom[get_cold_indx(ds_cesm_anom, mo_init=10, mo_end=3),...]

ds_cesm_anom = ds_cesm_anom.sel(lead=slice(0,6)).stack(new=('time','lead'),
                                                       flat=('lat','lon')).transpose('new','flat')

z500_cesm_boot_1, z500_cesm_boot_2, z500_cesm_boot_3, z500_cesm_boot_4 = cluster_analysis.composite_clusters_indx(
    ds_cesm_anom, k_means, pca_obj, use_pca=True)

## load hindcast cesm data with lead time bias corrected anomalies (and era5 similarly arranged)

In [6]:
lat0=10; lat1=70; lon0=-150; lon1=-40

# era5 data

z500_era5, z500_era5_dt = som_analysis.open_era5_files(
    variable='z500', return_time=True, 
    lat0=lat0,lat1=lat1,lon0=lon0,lon1=lon1,
    leadday0=0,leadday1=42,rolldays=5,)

z500_standard_era5 = z500_era5.stack(
    new=('time','lead'),flat=('lat','lon')).transpose('new','flat')

# cesm data

z500_cesm, z500_cesm_dt = som_analysis.open_cesm_files(
    variable='zg_500', return_time=True, 
    lat0=lat0,lat1=lat1,lon0=lon0,lon1=lon1,
    leadday0=0,leadday1=42,rolldays=5,)

z500_standard_cesm = z500_cesm.stack(
    new=('time','lead'),flat=('lat','lon')).transpose('new','flat')

## extract weather regime indices

In [7]:
### grab cluster indices

z500_era5_tmp_1, z500_era5_tmp_2, z500_era5_tmp_3, z500_era5_tmp_4 = cluster_analysis.composite_clusters_indx(
    z500_standard_era5, k_means, pca_obj, use_pca=True)

z500_cesm_tmp_1, z500_cesm_tmp_2, z500_cesm_tmp_3, z500_cesm_tmp_4 = cluster_analysis.composite_clusters_indx(
    z500_standard_cesm, k_means, pca_obj, use_pca=True)

## open data across northern hemisphere for figures

In [8]:
# era5 data
z500_era5_tmp, _ = som_analysis.open_era5_files(variable='z500', return_time=True, 
                                                lat0=10,lat1=90,lon0=-360,lon1=0,
                                                leadday0=0,leadday1=42,rolldays=5)

# cesm data
z500_cesm_tmp, _ = som_analysis.open_cesm_files(variable='zg_500', return_time=True, 
                                                lat0=10,lat1=90,lon0=-360,lon1=0,
                                                leadday0=0,leadday1=42,rolldays=5)

# restructure data array
z500_standard_era5_tmp = z500_era5_tmp.stack(
    new=('time','lead'),flat=('lat','lon')).transpose('new','flat')

z500_standard_cesm_tmp = z500_cesm_tmp.stack(
    new=('time','lead'),flat=('lat','lon')).transpose('new','flat')

## extract weather regimes from northern hemisphere data

In [9]:
# extract clusters using indices

z500_era5_tmp_01 = z500_standard_era5_tmp.unstack('flat').transpose('new','lat','lon')[z500_era5_tmp_1, :, :]
z500_era5_tmp_02 = z500_standard_era5_tmp.unstack('flat').transpose('new','lat','lon')[z500_era5_tmp_2, :, :]
z500_era5_tmp_03 = z500_standard_era5_tmp.unstack('flat').transpose('new','lat','lon')[z500_era5_tmp_3, :, :]
z500_era5_tmp_04 = z500_standard_era5_tmp.unstack('flat').transpose('new','lat','lon')[z500_era5_tmp_4, :, :]

z500_cesm_tmp_01 = z500_standard_cesm_tmp.unstack('flat').transpose('new','lat','lon')[z500_cesm_tmp_1, :, :]
z500_cesm_tmp_02 = z500_standard_cesm_tmp.unstack('flat').transpose('new','lat','lon')[z500_cesm_tmp_2, :, :]
z500_cesm_tmp_03 = z500_standard_cesm_tmp.unstack('flat').transpose('new','lat','lon')[z500_cesm_tmp_3, :, :]
z500_cesm_tmp_04 = z500_standard_cesm_tmp.unstack('flat').transpose('new','lat','lon')[z500_cesm_tmp_4, :, :]

## bootstrap for era5 confidence intervals

In [12]:
# era5 data across NH for bootstrap confidence

ds_era5_forboot = narm_analysis.era5_z500(lat0=10,lat1=90,lon0=-360,lon1=0)
ds_era5_forboot = narm_analysis.era5_climo_wrs(ds_era5_forboot, rolling_days=5, variable='clim')
ds_era5_forboot = ds_era5_forboot[get_cold_indx(ds_era5_forboot, mo_init=10, mo_end=3),...]

ds_era5_forboot_lon = ds_era5_forboot.lon.values
ds_era5_forboot_lat = ds_era5_forboot.lat.values

ds_era5_forboot = ds_era5_forboot.values

boot_ = np.zeros((ds_era5_forboot.shape[1],
                  ds_era5_forboot.shape[2]))

In [13]:
for ind in range(0,10000):

    np.random.seed(ind + 1)
    rand_indx = [np.random.choice(ds_era5_forboot.shape[0]) for i in range(z500_era5_boot_1.shape[0])]
    boot_ = np.nanmean(ds_era5_forboot[rand_indx,...], axis=0)
    
    xr.Dataset(
        data_vars=dict(
            iteration=(["lat", "lon"], boot_),
        ),
        coords=dict(
            lon=(["lon"], ds_era5_forboot_lon),
            lat=(["lat"], ds_era5_forboot_lat),
        ),
        attrs=dict(description="For bootstrap confidence intervals."),
    ).to_netcdf(
        f'/glade/scratch/molina/s2s/bootstrap/era5_wr1/z500_era5_boot_{ind + 1}.nc')

In [14]:
for ind in range(0,10000):

    np.random.seed(ind + 1)
    rand_indx = [np.random.choice(ds_era5_forboot.shape[0]) for i in range(z500_era5_boot_2.shape[0])]
    boot_ = np.nanmean(ds_era5_forboot[rand_indx,...], axis=0)
    
    xr.Dataset(
        data_vars=dict(
            iteration=(["lat", "lon"], boot_),
        ),
        coords=dict(
            lon=(["lon"], ds_era5_forboot_lon),
            lat=(["lat"], ds_era5_forboot_lat),
        ),
        attrs=dict(description="For bootstrap confidence intervals."),
    ).to_netcdf(
        f'/glade/scratch/molina/s2s/bootstrap/era5_wr2/z500_era5_boot_{ind + 1}.nc')

In [15]:
for ind in range(0,10000):

    np.random.seed(ind + 1)
    rand_indx = [np.random.choice(ds_era5_forboot.shape[0]) for i in range(z500_era5_boot_3.shape[0])]
    boot_ = np.nanmean(ds_era5_forboot[rand_indx,...], axis=0)
    
    xr.Dataset(
        data_vars=dict(
            iteration=(["lat", "lon"], boot_),
        ),
        coords=dict(
            lon=(["lon"], ds_era5_forboot_lon),
            lat=(["lat"], ds_era5_forboot_lat),
        ),
        attrs=dict(description="For bootstrap confidence intervals."),
    ).to_netcdf(
        f'/glade/scratch/molina/s2s/bootstrap/era5_wr3/z500_era5_boot_{ind + 1}.nc')

In [16]:
for ind in range(0,10000):

    np.random.seed(ind + 1)
    rand_indx = [np.random.choice(ds_era5_forboot.shape[0]) for i in range(z500_era5_boot_4.shape[0])]
    boot_ = np.nanmean(ds_era5_forboot[rand_indx,...], axis=0)
    
    xr.Dataset(
        data_vars=dict(
            iteration=(["lat", "lon"], boot_),
        ),
        coords=dict(
            lon=(["lon"], ds_era5_forboot_lon),
            lat=(["lat"], ds_era5_forboot_lat),
        ),
        attrs=dict(description="For bootstrap confidence intervals."),
    ).to_netcdf(
        f'/glade/scratch/molina/s2s/bootstrap/era5_wr4/z500_era5_boot_{ind + 1}.nc')

## bootstrap for cesm confidence intervals

In [17]:
# cesm data across NH for bootstrap confidence

ds_cesm_forboot = narm_analysis.open_cesm_climo_wrs(lat0=10,lat1=90,lon0=-360,lon1=0)
ds_cesm_forboot = narm_analysis.cesm_climo_wrs(ds_cesm_forboot, rolling_days=5, variable='zg_500')
ds_cesm_forboot = ds_cesm_forboot[get_cold_indx(ds_cesm_forboot, mo_init=10, mo_end=3),...]

ds_cesm_forboot_lon = ds_cesm_forboot.lon.values
ds_cesm_forboot_lat = ds_cesm_forboot.lat.values

ds_cesm_forboot = ds_cesm_forboot.sel(
                    lead=slice(0,6)).stack(new=('time','lead')).transpose('new','lat','lon').values

boot_ = np.zeros((ds_cesm_forboot.shape[1],
                  ds_cesm_forboot.shape[2]))

In [18]:
for ind in range(0,10000):

    np.random.seed(ind + 1)
    rand_indx = [np.random.choice(ds_cesm_forboot.shape[0]) for i in range(z500_cesm_boot_1.shape[0])]
    boot_ = np.nanmean(ds_cesm_forboot[rand_indx,...], axis=0)
    
    xr.Dataset(
        data_vars=dict(
            iteration=(["lat", "lon"], boot_),
        ),
        coords=dict(
            lon=(["lon"], ds_cesm_forboot_lon),
            lat=(["lat"], ds_cesm_forboot_lat),
        ),
        attrs=dict(description="For bootstrap confidence intervals."),
    ).to_netcdf(
        f'/glade/scratch/molina/s2s/bootstrap/cesm_wr1/z500_cesm_boot_{ind + 1}.nc')

In [19]:
for ind in range(0,10000):

    np.random.seed(ind + 1)
    rand_indx = [np.random.choice(ds_cesm_forboot.shape[0]) for i in range(z500_cesm_boot_2.shape[0])]
    boot_ = np.nanmean(ds_cesm_forboot[rand_indx,...], axis=0)
    
    xr.Dataset(
        data_vars=dict(
            iteration=(["lat", "lon"], boot_),
        ),
        coords=dict(
            lon=(["lon"], ds_cesm_forboot_lon),
            lat=(["lat"], ds_cesm_forboot_lat),
        ),
        attrs=dict(description="For bootstrap confidence intervals."),
    ).to_netcdf(
        f'/glade/scratch/molina/s2s/bootstrap/cesm_wr2/z500_cesm_boot_{ind + 1}.nc')

In [20]:
for ind in range(0,10000):

    np.random.seed(ind + 1)
    rand_indx = [np.random.choice(ds_cesm_forboot.shape[0]) for i in range(z500_cesm_boot_3.shape[0])]
    boot_ = np.nanmean(ds_cesm_forboot[rand_indx,...], axis=0)
    
    xr.Dataset(
        data_vars=dict(
            iteration=(["lat", "lon"], boot_),
        ),
        coords=dict(
            lon=(["lon"], ds_cesm_forboot_lon),
            lat=(["lat"], ds_cesm_forboot_lat),
        ),
        attrs=dict(description="For bootstrap confidence intervals."),
    ).to_netcdf(
        f'/glade/scratch/molina/s2s/bootstrap/cesm_wr3/z500_cesm_boot_{ind + 1}.nc')

In [21]:
for ind in range(0,10000):

    np.random.seed(ind + 1)
    rand_indx = [np.random.choice(ds_cesm_forboot.shape[0]) for i in range(z500_cesm_boot_4.shape[0])]
    boot_ = np.nanmean(ds_cesm_forboot[rand_indx,...], axis=0)
    
    xr.Dataset(
        data_vars=dict(
            iteration=(["lat", "lon"], boot_),
        ),
        coords=dict(
            lon=(["lon"], ds_cesm_forboot_lon),
            lat=(["lat"], ds_cesm_forboot_lat),
        ),
        attrs=dict(description="For bootstrap confidence intervals."),
    ).to_netcdf(
        f'/glade/scratch/molina/s2s/bootstrap/cesm_wr4/z500_cesm_boot_{ind + 1}.nc')

## compute confidence intervals from bootstrap

In [10]:
lev_1 = 0.025
lev_2 = 0.975
lev_3 = 0.005
lev_4 = 0.995

tmp_era5_wr1 = xr.open_mfdataset(
    '/glade/scratch/molina/s2s/bootstrap/era5_wr1/z500_era5_boot_*.nc',
    combine='nested', concat_dim='iter').chunk(
    dict(iter=-1)).quantile([lev_1,lev_2,lev_3,lev_4], dim='iter', skipna=True)

tmp_era5_wr2 = xr.open_mfdataset(
    '/glade/scratch/molina/s2s/bootstrap/era5_wr2/z500_era5_boot_*.nc',
    combine='nested', concat_dim='iter').chunk(
    dict(iter=-1)).quantile([lev_1,lev_2,lev_3,lev_4], dim='iter', skipna=True)

tmp_era5_wr3 = xr.open_mfdataset(
    '/glade/scratch/molina/s2s/bootstrap/era5_wr3/z500_era5_boot_*.nc',
    combine='nested', concat_dim='iter').chunk(
    dict(iter=-1)).quantile([lev_1,lev_2,lev_3,lev_4], dim='iter', skipna=True)

tmp_era5_wr4 = xr.open_mfdataset(
    '/glade/scratch/molina/s2s/bootstrap/era5_wr4/z500_era5_boot_*.nc',
    combine='nested', concat_dim='iter').chunk(
    dict(iter=-1)).quantile([lev_1,lev_2,lev_3,lev_4], dim='iter', skipna=True)

In [11]:
tmp_cesm_wr1 = xr.open_mfdataset(
    '/glade/scratch/molina/s2s/bootstrap/cesm_wr1/z500_cesm_boot_*.nc',
    combine='nested', concat_dim='iter').chunk(
    dict(iter=-1)).quantile([lev_1,lev_2,lev_3,lev_4], dim='iter', skipna=True)

tmp_cesm_wr2 = xr.open_mfdataset(
    '/glade/scratch/molina/s2s/bootstrap/cesm_wr2/z500_cesm_boot_*.nc',
    combine='nested', concat_dim='iter').chunk(
    dict(iter=-1)).quantile([lev_1,lev_2,lev_3,lev_4], dim='iter', skipna=True)

tmp_cesm_wr3 = xr.open_mfdataset(
    '/glade/scratch/molina/s2s/bootstrap/cesm_wr3/z500_cesm_boot_*.nc',
    combine='nested', concat_dim='iter').chunk(
    dict(iter=-1)).quantile([lev_1,lev_2,lev_3,lev_4], dim='iter', skipna=True)

tmp_cesm_wr4 = xr.open_mfdataset(
    '/glade/scratch/molina/s2s/bootstrap/cesm_wr4/z500_cesm_boot_*.nc',
    combine='nested', concat_dim='iter').chunk(
    dict(iter=-1)).quantile([lev_1,lev_2,lev_3,lev_4], dim='iter', skipna=True)

## save data for figures 1 and 2

In [56]:
ds_era5 = xr.Dataset(
    
    data_vars=dict(
        
        wr_nums=(["wr"],np.array([len(z500_era5_tmp_01.new),
                                  len(z500_era5_tmp_02.new),
                                  len(z500_era5_tmp_03.new),
                                  len(z500_era5_tmp_04.new)])),
        
        wr1_era5=(["lat", "lon"], z500_era5_tmp_01.mean('new',skipna=True).values),
        wr2_era5=(["lat", "lon"], z500_era5_tmp_02.mean('new',skipna=True).values),
        wr3_era5=(["lat", "lon"], z500_era5_tmp_03.mean('new',skipna=True).values),
        wr4_era5=(["lat", "lon"], z500_era5_tmp_04.mean('new',skipna=True).values),
        
        wr1_era5_025=(["lat", "lon"], tmp_era5_wr1.sel(quantile=0.025)['iteration'].transpose('lat','lon').values),
        wr1_era5_975=(["lat", "lon"], tmp_era5_wr1.sel(quantile=0.975)['iteration'].transpose('lat','lon').values),
        wr1_era5_005=(["lat", "lon"], tmp_era5_wr1.sel(quantile=0.005)['iteration'].transpose('lat','lon').values),
        wr1_era5_995=(["lat", "lon"], tmp_era5_wr1.sel(quantile=0.995)['iteration'].transpose('lat','lon').values),
        
        wr2_era5_025=(["lat", "lon"], tmp_era5_wr2.sel(quantile=0.025)['iteration'].transpose('lat','lon').values),
        wr2_era5_975=(["lat", "lon"], tmp_era5_wr2.sel(quantile=0.975)['iteration'].transpose('lat','lon').values),
        wr2_era5_005=(["lat", "lon"], tmp_era5_wr2.sel(quantile=0.005)['iteration'].transpose('lat','lon').values),
        wr2_era5_995=(["lat", "lon"], tmp_era5_wr2.sel(quantile=0.995)['iteration'].transpose('lat','lon').values),
        
        wr3_era5_025=(["lat", "lon"], tmp_era5_wr3.sel(quantile=0.025)['iteration'].transpose('lat','lon').values),
        wr3_era5_975=(["lat", "lon"], tmp_era5_wr3.sel(quantile=0.975)['iteration'].transpose('lat','lon').values),
        wr3_era5_005=(["lat", "lon"], tmp_era5_wr3.sel(quantile=0.005)['iteration'].transpose('lat','lon').values),
        wr3_era5_995=(["lat", "lon"], tmp_era5_wr3.sel(quantile=0.995)['iteration'].transpose('lat','lon').values),
        
        wr4_era5_025=(["lat", "lon"], tmp_era5_wr4.sel(quantile=0.025)['iteration'].transpose('lat','lon').values),
        wr4_era5_975=(["lat", "lon"], tmp_era5_wr4.sel(quantile=0.975)['iteration'].transpose('lat','lon').values),
        wr4_era5_005=(["lat", "lon"], tmp_era5_wr4.sel(quantile=0.005)['iteration'].transpose('lat','lon').values),
        wr4_era5_995=(["lat", "lon"], tmp_era5_wr4.sel(quantile=0.995)['iteration'].transpose('lat','lon').values),
    ),
    
    coords=dict(
        lon=(["lon"], z500_era5_tmp_01.lon.values),
        lat=(["lat"], z500_era5_tmp_01.lat.values),
        wr= (["wr"],  np.array([1,2,3,4])),
    ),
    
    attrs=dict(description="Figure data for weather regimes research."),
)

In [57]:
ds_cesm = xr.Dataset(
    
    data_vars=dict(
        
        wr_nums=(["wr"],np.array([len(z500_cesm_tmp_01.new),
                                  len(z500_cesm_tmp_02.new),
                                  len(z500_cesm_tmp_03.new),
                                  len(z500_cesm_tmp_04.new)])),
        
        wr1_cesm=(["lat", "lon"], z500_cesm_tmp_01.mean('new',skipna=True).values),
        wr2_cesm=(["lat", "lon"], z500_cesm_tmp_02.mean('new',skipna=True).values),
        wr3_cesm=(["lat", "lon"], z500_cesm_tmp_03.mean('new',skipna=True).values),
        wr4_cesm=(["lat", "lon"], z500_cesm_tmp_04.mean('new',skipna=True).values),
        
        wr1_cesm_025=(["lat", "lon"], tmp_cesm_wr1.sel(quantile=0.025)['iteration'].transpose('lat','lon').values),
        wr1_cesm_975=(["lat", "lon"], tmp_cesm_wr1.sel(quantile=0.975)['iteration'].transpose('lat','lon').values),
        wr1_cesm_005=(["lat", "lon"], tmp_cesm_wr1.sel(quantile=0.005)['iteration'].transpose('lat','lon').values),
        wr1_cesm_995=(["lat", "lon"], tmp_cesm_wr1.sel(quantile=0.995)['iteration'].transpose('lat','lon').values),
        
        wr2_cesm_025=(["lat", "lon"], tmp_cesm_wr2.sel(quantile=0.025)['iteration'].transpose('lat','lon').values),
        wr2_cesm_975=(["lat", "lon"], tmp_cesm_wr2.sel(quantile=0.975)['iteration'].transpose('lat','lon').values),
        wr2_cesm_005=(["lat", "lon"], tmp_cesm_wr2.sel(quantile=0.005)['iteration'].transpose('lat','lon').values),
        wr2_cesm_995=(["lat", "lon"], tmp_cesm_wr2.sel(quantile=0.995)['iteration'].transpose('lat','lon').values),
        
        wr3_cesm_025=(["lat", "lon"], tmp_cesm_wr3.sel(quantile=0.025)['iteration'].transpose('lat','lon').values),
        wr3_cesm_975=(["lat", "lon"], tmp_cesm_wr3.sel(quantile=0.975)['iteration'].transpose('lat','lon').values),
        wr3_cesm_005=(["lat", "lon"], tmp_cesm_wr3.sel(quantile=0.005)['iteration'].transpose('lat','lon').values),
        wr3_cesm_995=(["lat", "lon"], tmp_cesm_wr3.sel(quantile=0.995)['iteration'].transpose('lat','lon').values),
        
        wr4_cesm_025=(["lat", "lon"], tmp_cesm_wr4.sel(quantile=0.025)['iteration'].transpose('lat','lon').values),
        wr4_cesm_975=(["lat", "lon"], tmp_cesm_wr4.sel(quantile=0.975)['iteration'].transpose('lat','lon').values),
        wr4_cesm_005=(["lat", "lon"], tmp_cesm_wr4.sel(quantile=0.005)['iteration'].transpose('lat','lon').values),
        wr4_cesm_995=(["lat", "lon"], tmp_cesm_wr4.sel(quantile=0.995)['iteration'].transpose('lat','lon').values),
    ),
    
    coords=dict(
        lon=(["lon"], z500_cesm_tmp_01.lon.values),
        lat=(["lat"], z500_cesm_tmp_01.lat.values),
        wr= (["wr"],  np.array([1,2,3,4])),
    ),
    
    attrs=dict(description="Figure data for weather regimes research."),
)

In [65]:
ds_samples = xr.Dataset(
    
    data_vars=dict(
        
        era5_wr1=(["era5wr1","lat","lon"],z500_era5_tmp_01.values),
        era5_wr2=(["era5wr2","lat","lon"],z500_era5_tmp_02.values),
        era5_wr3=(["era5wr3","lat","lon"],z500_era5_tmp_03.values),
        era5_wr4=(["era5wr4","lat","lon"],z500_era5_tmp_04.values),
        
        cesm_wr1=(["cesmwr1","lat","lon"],z500_cesm_tmp_01.values),
        cesm_wr2=(["cesmwr2","lat","lon"],z500_cesm_tmp_02.values),
        cesm_wr3=(["cesmwr3","lat","lon"],z500_cesm_tmp_03.values),
        cesm_wr4=(["cesmwr4","lat","lon"],z500_cesm_tmp_04.values),
    ),
    
    coords=dict(
        lon=(["lon"], z500_cesm_tmp_01.lon.values),
        lat=(["lat"], z500_cesm_tmp_01.lat.values),
    ),
    
    attrs=dict(description="Table data for weather regimes research."),
)

In [60]:
ds_era5.to_netcdf('/glade/scratch/molina/s2s/bootstrap/era5_wxregimes.nc')

In [61]:
ds_cesm.to_netcdf('/glade/scratch/molina/s2s/bootstrap/cesm_wxregimes.nc')

In [67]:
ds_samples.to_netcdf('/glade/scratch/molina/s2s/bootstrap/data_wxregimes.nc')