## Imports

In [1]:
import numpy as np
import pandas as pd
import xarray as xr

from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

import som_analysis
import cluster_analysis
import narm_analysis

## functions

In [2]:
def get_cold_indx(ds, mo_init=9, mo_end=2):
    """
    Extract indices for cold season.
    Grabbing Sept thru February init, for Oct thru March predictions.
    """
    dt_array = pd.to_datetime(ds['time'])
    return xr.where((dt_array.month >= mo_init) | (
        dt_array.month <= mo_end), True, False)

## open and preprocess data

In [3]:
# region for clustering
lat0 = 10
lat1 = 70
lon0 = -150
lon1 = -40

# open era5 data and slice
ds_era5 = narm_analysis.era5_z500(
    lat0=lat0, lat1=lat1, lon0=lon0, lon1=lon1)

# era5 anomalies
ds_era5_anom = narm_analysis.era5_climo_wrs(
    ds_era5, rolling_days=5, variable='clim')

# restructure era5 array for machine learning training (SONDJFM)
ds_era5_anom = ds_era5_anom[
    get_cold_indx(ds_era5_anom, mo_init=10, mo_end=3), ...]
ds_era5_train = ds_era5_anom.stack(
    flat=('lat', 'lon')).transpose('time', 'flat').values

## pca and kmeans with era5

In [4]:
# create pca object
pca_obj = PCA(12, whiten=True)

# fit pca with era5
pca_obj = pca_obj.fit(ds_era5_train)

# transform era5 data with pca
ds_era5_train = pca_obj.transform(ds_era5_train)

print(f'Variance explained: {pca_obj.explained_variance_ratio_ * 100}')
print(
f'Cumulative sum of variance explained for EOF1 and EOF2: {np.cumsum(pca_obj.explained_variance_ratio_) * 100}'
)

In [5]:
# train kmeans
k_means = KMeans(n_clusters=4,
                 init='k-means++',
                 n_init=10000,
                 max_iter=300,
                 tol=0.0001,
                 verbose=0,
                 random_state=0).fit(ds_era5_train)

print(f'inertia: {k_means.inertia_}')

## load data with lead time bias corrected anomalies

In [6]:
# era5 data
z500_era5, z500_era5_dt = som_analysis.open_era5_files(
    variable='z500', return_time=True,
    lat0=lat0, lat1=lat1, lon0=lon0, lon1=lon1,
    leadday0=0, leadday1=42, rolldays=5,)

# cesm data
z500_cesm, z500_cesm_dt = som_analysis.open_cesm_files(
    variable='zg_500', return_time=True,
    lat0=lat0, lat1=lat1, lon0=lon0, lon1=lon1,
    leadday0=0, leadday1=42, rolldays=5,)

# restructure arrays
z500_standard_era5 = z500_era5.stack(
    new=('time', 'lead'), flat=('lat', 'lon')).transpose('new', 'flat')
z500_standard_cesm = z500_cesm.stack(
    new=('time', 'lead'), flat=('lat', 'lon')).transpose('new', 'flat')

## extract cluster labels

(projecting era5 pca and kmeans onto cesm2)

In [7]:
cluster_era5_evo = cluster_analysis.single_clusters(
    z500_standard_era5, k_means, pca_obj, use_pca=True)
cluster_cesm_evo = cluster_analysis.single_clusters(
    z500_standard_cesm, k_means, pca_obj, use_pca=True)

### what regime follows first regime? (ERA5)

In [8]:
thearray = cluster_era5_evo

wr1_era5_next = []
wr2_era5_next = []
wr3_era5_next = []
wr4_era5_next = []

# number of forecasts
for i in range(543):

    for j in range(43):

        if j == 0:

            wr_ = thearray[0, i, j]

        if j > 0:

            if wr_ != thearray[0, i, j]:

                if wr_ == 0.0:
                    wr1_era5_next.append(thearray[0, i, j])
                if wr_ == 1.0:
                    wr2_era5_next.append(thearray[0, i, j])
                if wr_ == 2.0:
                    wr3_era5_next.append(thearray[0, i, j])
                if wr_ == 3.0:
                    wr4_era5_next.append(thearray[0, i, j])
                break

wr1_era5_next = np.array(wr1_era5_next)
wr2_era5_next = np.array(wr2_era5_next)
wr3_era5_next = np.array(wr3_era5_next)
wr4_era5_next = np.array(wr4_era5_next)

print('')
print('weather regime 1 to weather regimes 2, 3, and 4')
print((wr1_era5_next[wr1_era5_next == 1.].shape[0] / wr1_era5_next.shape[0]) * 100)
print((wr1_era5_next[wr1_era5_next == 2.].shape[0] / wr1_era5_next.shape[0]) * 100)
print((wr1_era5_next[wr1_era5_next == 3.].shape[0] / wr1_era5_next.shape[0]) * 100)
print('')
print('weather regime 2 to weather regimes 1, 3, and 4')
print((wr2_era5_next[wr2_era5_next == 0.].shape[0] / wr2_era5_next.shape[0]) * 100)
print((wr2_era5_next[wr2_era5_next == 2.].shape[0] / wr2_era5_next.shape[0]) * 100)
print((wr2_era5_next[wr2_era5_next == 3.].shape[0] / wr2_era5_next.shape[0]) * 100)
print('')
print('weather regime 3 to weather regimes 1, 2, and 4')
print((wr3_era5_next[wr3_era5_next == 0.].shape[0] / wr3_era5_next.shape[0]) * 100)
print((wr3_era5_next[wr3_era5_next == 1.].shape[0] / wr3_era5_next.shape[0]) * 100)
print((wr3_era5_next[wr3_era5_next == 3.].shape[0] / wr3_era5_next.shape[0]) * 100)
print('')
print('weather regime 4 to weather regimes 1, 2, and 3')
print((wr4_era5_next[wr4_era5_next == 0.].shape[0] / wr4_era5_next.shape[0]) * 100)
print((wr4_era5_next[wr4_era5_next == 1.].shape[0] / wr4_era5_next.shape[0]) * 100)
print((wr4_era5_next[wr4_era5_next == 2.].shape[0] / wr4_era5_next.shape[0]) * 100)


weather regime 1 to weather regimes 2, 3, and 4
28.155339805825243
29.126213592233007
42.71844660194174

weather regime 2 to weather regimes 1, 3, and 4
25.196850393700785
40.15748031496063
34.645669291338585

weather regime 3 to weather regimes 1, 2, and 4
16.19718309859155
42.25352112676056
41.54929577464789

weather regime 4 to weather regimes 1, 2, and 3
34.705882352941174
38.82352941176471
26.47058823529412


### bootstrap two-tailed

In [15]:
# method 1

thearray = cluster_era5_evo

boot_iter = 10000

wr1to2_era5_boot = np.zeros(boot_iter)
wr1to3_era5_boot = np.zeros(boot_iter)
wr1to4_era5_boot = np.zeros(boot_iter)

wr2to1_era5_boot = np.zeros(boot_iter)
wr2to3_era5_boot = np.zeros(boot_iter)
wr2to4_era5_boot = np.zeros(boot_iter)

wr3to1_era5_boot = np.zeros(boot_iter)
wr3to2_era5_boot = np.zeros(boot_iter)
wr3to4_era5_boot = np.zeros(boot_iter)

wr4to1_era5_boot = np.zeros(boot_iter)
wr4to2_era5_boot = np.zeros(boot_iter)
wr4to3_era5_boot = np.zeros(boot_iter)

for k in range(boot_iter):

    np.random.seed(k + 1)
    rand_indx = [np.random.choice(wr1_era5_next.shape[0]) for ids in range(wr1_era5_next.shape[0])]
    wr1to2_era5_boot[k] = wr1_era5_next[rand_indx][wr1_era5_next[rand_indx] == 1.].shape[0] / wr1_era5_next.shape[0]
    wr1to3_era5_boot[k] = wr1_era5_next[rand_indx][wr1_era5_next[rand_indx] == 2.].shape[0] / wr1_era5_next.shape[0]
    wr1to4_era5_boot[k] = wr1_era5_next[rand_indx][wr1_era5_next[rand_indx] == 3.].shape[0] / wr1_era5_next.shape[0]

    np.random.seed(k + 1)
    rand_indx = [np.random.choice(wr2_era5_next.shape[0]) for ids in range(wr2_era5_next.shape[0])]
    wr2to1_era5_boot[k] = wr2_era5_next[rand_indx][wr2_era5_next[rand_indx] == 0.].shape[0] / wr2_era5_next.shape[0]
    wr2to3_era5_boot[k] = wr2_era5_next[rand_indx][wr2_era5_next[rand_indx] == 2.].shape[0] / wr2_era5_next.shape[0]
    wr2to4_era5_boot[k] = wr2_era5_next[rand_indx][wr2_era5_next[rand_indx] == 3.].shape[0] / wr2_era5_next.shape[0]

    np.random.seed(k + 1)
    rand_indx = [np.random.choice(wr3_era5_next.shape[0]) for ids in range(wr3_era5_next.shape[0])]
    wr3to1_era5_boot[k] = wr3_era5_next[rand_indx][wr3_era5_next[rand_indx] == 0.].shape[0] / wr3_era5_next.shape[0]
    wr3to2_era5_boot[k] = wr3_era5_next[rand_indx][wr3_era5_next[rand_indx] == 1.].shape[0] / wr3_era5_next.shape[0]
    wr3to4_era5_boot[k] = wr3_era5_next[rand_indx][wr3_era5_next[rand_indx] == 3.].shape[0] / wr3_era5_next.shape[0]

    np.random.seed(k + 1)
    rand_indx = [np.random.choice(wr4_era5_next.shape[0]) for ids in range(wr4_era5_next.shape[0])]
    wr4to1_era5_boot[k] = wr4_era5_next[rand_indx][wr4_era5_next[rand_indx] == 0.].shape[0] / wr4_era5_next.shape[0]
    wr4to2_era5_boot[k] = wr4_era5_next[rand_indx][wr4_era5_next[rand_indx] == 1.].shape[0] / wr4_era5_next.shape[0]
    wr4to3_era5_boot[k] = wr4_era5_next[rand_indx][wr4_era5_next[rand_indx] == 2.].shape[0] / wr4_era5_next.shape[0]

In [16]:
print('')
print('weather regime 1 to weather regimes 2, 3, and 4')
print(np.nanpercentile(wr1to2_era5_boot, q=[0.5, 2.5, 97.5, 99.5]))
print(np.nanpercentile(wr1to3_era5_boot, q=[0.5, 2.5, 97.5, 99.5]))
print(np.nanpercentile(wr1to4_era5_boot, q=[0.5, 2.5, 97.5, 99.5]))
print('')
print('weather regime 2 to weather regimes 1, 3, and 4')
print(np.nanpercentile(wr2to1_era5_boot, q=[0.5, 2.5, 97.5, 99.5]))
print(np.nanpercentile(wr2to3_era5_boot, q=[0.5, 2.5, 97.5, 99.5]))
print(np.nanpercentile(wr2to4_era5_boot, q=[0.5, 2.5, 97.5, 99.5]))
print('')
print('weather regime 3 to weather regimes 1, 2, and 4')
print(np.nanpercentile(wr3to1_era5_boot, q=[0.5, 2.5, 97.5, 99.5]))
print(np.nanpercentile(wr3to2_era5_boot, q=[0.5, 2.5, 97.5, 99.5]))
print(np.nanpercentile(wr3to4_era5_boot, q=[0.5, 2.5, 97.5, 99.5]))
print('')
print('weather regime 4 to weather regimes 1, 2, and 3')
print(np.nanpercentile(wr4to1_era5_boot, q=[0.5, 2.5, 97.5, 99.5]))
print(np.nanpercentile(wr4to2_era5_boot, q=[0.5, 2.5, 97.5, 99.5]))
print(np.nanpercentile(wr4to3_era5_boot, q=[0.5, 2.5, 97.5, 99.5]))


weather regime 1 to weather regimes 2, 3, and 4
[0.18 0.2  0.38 0.41]
[0.15 0.18 0.35 0.38]
[0.32995 0.35    0.55    0.58   ]

weather regime 2 to weather regimes 1, 3, and 4
[0.1965812  0.22222222 0.39316239 0.41880342]
[0.24786325 0.27350427 0.44444444 0.47863248]
[0.22222222 0.24786325 0.41880342 0.44444444]

weather regime 3 to weather regimes 1, 2, and 4
[0.06569343 0.08029197 0.19708029 0.2189781 ]
[0.34306569 0.37226277 0.54014599 0.56934307]
[0.30656934 0.32846715 0.49635036 0.51824818]

weather regime 4 to weather regimes 1, 2, and 3
[0.21693122 0.23809524 0.37037037 0.39153439]
[0.33862434 0.35978836 0.4973545  0.52380952]
[0.19047619 0.20634921 0.33333333 0.35978836]


### bootstrap two-tailed

In [17]:
# method 2

thearray = cluster_era5_evo

boot_iter = 10000

wr1to2_era5_boot = np.zeros(boot_iter)
wr1to3_era5_boot = np.zeros(boot_iter)
wr1to4_era5_boot = np.zeros(boot_iter)

wr2to1_era5_boot = np.zeros(boot_iter)
wr2to3_era5_boot = np.zeros(boot_iter)
wr2to4_era5_boot = np.zeros(boot_iter)

wr3to1_era5_boot = np.zeros(boot_iter)
wr3to2_era5_boot = np.zeros(boot_iter)
wr3to4_era5_boot = np.zeros(boot_iter)

wr4to1_era5_boot = np.zeros(boot_iter)
wr4to2_era5_boot = np.zeros(boot_iter)
wr4to3_era5_boot = np.zeros(boot_iter)

for k in range(boot_iter):

    np.random.seed(k + 1)
    rand_indx = [np.random.choice(543) for ids in range(543)]

    wr1_era5_next = []
    wr2_era5_next = []
    wr3_era5_next = []
    wr4_era5_next = []

    # number of forecasts
    for i in rand_indx:

        for j in range(43):

            if j == 0:

                wr_ = thearray[0, i, j]

            if j > 0:

                if wr_ != thearray[0, i, j]:

                    if wr_ == 0.0:
                        wr1_era5_next.append(thearray[0, i, j])
                    if wr_ == 1.0:
                        wr2_era5_next.append(thearray[0, i, j])
                    if wr_ == 2.0:
                        wr3_era5_next.append(thearray[0, i, j])
                    if wr_ == 3.0:
                        wr4_era5_next.append(thearray[0, i, j])
                    break

    wr1_era5_next = np.array(wr1_era5_next)
    wr2_era5_next = np.array(wr2_era5_next)
    wr3_era5_next = np.array(wr3_era5_next)
    wr4_era5_next = np.array(wr4_era5_next)

    wr1to2_era5_boot[k] = wr1_era5_next[wr1_era5_next == 1.].shape[0] / wr1_era5_next.shape[0]
    wr1to3_era5_boot[k] = wr1_era5_next[wr1_era5_next == 2.].shape[0] / wr1_era5_next.shape[0]
    wr1to4_era5_boot[k] = wr1_era5_next[wr1_era5_next == 3.].shape[0] / wr1_era5_next.shape[0]

    wr2to1_era5_boot[k] = wr2_era5_next[wr2_era5_next == 0.].shape[0] / wr2_era5_next.shape[0]
    wr2to3_era5_boot[k] = wr2_era5_next[wr2_era5_next == 2.].shape[0] / wr2_era5_next.shape[0]
    wr2to4_era5_boot[k] = wr2_era5_next[wr2_era5_next == 3.].shape[0] / wr2_era5_next.shape[0]

    wr3to1_era5_boot[k] = wr3_era5_next[wr3_era5_next == 0.].shape[0] / wr3_era5_next.shape[0]
    wr3to2_era5_boot[k] = wr3_era5_next[wr3_era5_next == 1.].shape[0] / wr3_era5_next.shape[0]
    wr3to4_era5_boot[k] = wr3_era5_next[wr3_era5_next == 3.].shape[0] / wr3_era5_next.shape[0]

    wr4to1_era5_boot[k] = wr4_era5_next[wr4_era5_next == 0.].shape[0] / wr4_era5_next.shape[0]
    wr4to2_era5_boot[k] = wr4_era5_next[wr4_era5_next == 1.].shape[0] / wr4_era5_next.shape[0]
    wr4to3_era5_boot[k] = wr4_era5_next[wr4_era5_next == 2.].shape[0] / wr4_era5_next.shape[0]

In [18]:
print('')
print('weather regime 1 to weather regimes 2, 3, and 4')
print(np.nanpercentile(wr1to2_era5_boot, q=[0.5, 2.5, 97.5, 99.5]))
print(np.nanpercentile(wr1to3_era5_boot, q=[0.5, 2.5, 97.5, 99.5]))
print(np.nanpercentile(wr1to4_era5_boot, q=[0.5, 2.5, 97.5, 99.5]))
print('')
print('weather regime 2 to weather regimes 1, 3, and 4')
print(np.nanpercentile(wr2to1_era5_boot, q=[0.5, 2.5, 97.5, 99.5]))
print(np.nanpercentile(wr2to3_era5_boot, q=[0.5, 2.5, 97.5, 99.5]))
print(np.nanpercentile(wr2to4_era5_boot, q=[0.5, 2.5, 97.5, 99.5]))
print('')
print('weather regime 3 to weather regimes 1, 2, and 4')
print(np.nanpercentile(wr3to1_era5_boot, q=[0.5, 2.5, 97.5, 99.5]))
print(np.nanpercentile(wr3to2_era5_boot, q=[0.5, 2.5, 97.5, 99.5]))
print(np.nanpercentile(wr3to4_era5_boot, q=[0.5, 2.5, 97.5, 99.5]))
print('')
print('weather regime 4 to weather regimes 1, 2, and 3')
print(np.nanpercentile(wr4to1_era5_boot, q=[0.5, 2.5, 97.5, 99.5]))
print(np.nanpercentile(wr4to2_era5_boot, q=[0.5, 2.5, 97.5, 99.5]))
print(np.nanpercentile(wr4to3_era5_boot, q=[0.5, 2.5, 97.5, 99.5]))


weather regime 1 to weather regimes 2, 3, and 4
[0.16842053 0.1962571  0.37168142 0.4       ]
[0.18032623 0.20689655 0.3814433  0.40909091]
[0.30107475 0.33333333 0.5212766  0.55339806]

weather regime 2 to weather regimes 1, 3, and 4
[0.15909091 0.1796875  0.33058018 0.35593515]
[0.29032135 0.31666667 0.488      0.51493011]
[0.24264476 0.26446281 0.43065693 0.45689778]

weather regime 3 to weather regimes 1, 2, and 4
[0.08888799 0.10416667 0.22368421 0.2481203 ]
[0.31944444 0.34306569 0.50685169 0.53237457]
[0.31080714 0.33571429 0.4964539  0.52054851]

weather regime 4 to weather regimes 1, 2, and 3
[0.25308569 0.27607362 0.42011834 0.44099379]
[0.29341163 0.31578947 0.46198923 0.48587591]
[0.18055556 0.2        0.33146145 0.35326103]


### what regime follows first regime? (CESM2)

In [19]:
# what regime follows first regime?

thearray = cluster_cesm_evo

wr1_cesm_next = []
wr2_cesm_next = []
wr3_cesm_next = []
wr4_cesm_next = []

# number of forecasts
for i in range(543):

    for j in range(43):

        if j == 0:

            wr_ = thearray[0, i, j]

        if j > 0:

            if wr_ != thearray[0, i, j]:

                if wr_ == 0.0:
                    wr1_cesm_next.append(thearray[0, i, j])
                if wr_ == 1.0:
                    wr2_cesm_next.append(thearray[0, i, j])
                if wr_ == 2.0:
                    wr3_cesm_next.append(thearray[0, i, j])
                if wr_ == 3.0:
                    wr4_cesm_next.append(thearray[0, i, j])
                break

wr1_cesm_next = np.array(wr1_cesm_next)
wr2_cesm_next = np.array(wr2_cesm_next)
wr3_cesm_next = np.array(wr3_cesm_next)
wr4_cesm_next = np.array(wr4_cesm_next)

print('')
print('weather regime 1 to weather regimes 2, 3, and 4')
print((wr1_cesm_next[wr1_cesm_next == 1.].shape[0] / wr1_cesm_next.shape[0]) * 100)
print((wr1_cesm_next[wr1_cesm_next == 2.].shape[0] / wr1_cesm_next.shape[0]) * 100)
print((wr1_cesm_next[wr1_cesm_next == 3.].shape[0] / wr1_cesm_next.shape[0]) * 100)
print('')
print('weather regime 2 to weather regimes 1, 3, and 4')
print((wr2_cesm_next[wr2_cesm_next == 0.].shape[0] / wr2_cesm_next.shape[0]) * 100)
print((wr2_cesm_next[wr2_cesm_next == 2.].shape[0] / wr2_cesm_next.shape[0]) * 100)
print((wr2_cesm_next[wr2_cesm_next == 3.].shape[0] / wr2_cesm_next.shape[0]) * 100)
print('')
print('weather regime 3 to weather regimes 1, 2, and 4')
print((wr3_cesm_next[wr3_cesm_next == 0.].shape[0] / wr3_cesm_next.shape[0]) * 100)
print((wr3_cesm_next[wr3_cesm_next == 1.].shape[0] / wr3_cesm_next.shape[0]) * 100)
print((wr3_cesm_next[wr3_cesm_next == 3.].shape[0] / wr3_cesm_next.shape[0]) * 100)
print('')
print('weather regime 4 to weather regimes 1, 2, and 3')
print((wr4_cesm_next[wr4_cesm_next == 0.].shape[0] / wr4_cesm_next.shape[0]) * 100)
print((wr4_cesm_next[wr4_cesm_next == 1.].shape[0] / wr4_cesm_next.shape[0]) * 100)
print((wr4_cesm_next[wr4_cesm_next == 2.].shape[0] / wr4_cesm_next.shape[0]) * 100)


weather regime 1 to weather regimes 2, 3, and 4
21.0
33.0
46.0

weather regime 2 to weather regimes 1, 3, and 4
21.804511278195488
39.849624060150376
38.34586466165413

weather regime 3 to weather regimes 1, 2, and 4
19.014084507042252
31.690140845070424
49.29577464788733

weather regime 4 to weather regimes 1, 2, and 3
37.34939759036144
33.13253012048193
29.518072289156628


### bootstrap two-tailed

In [20]:
thearray = cluster_cesm_evo

boot_iter = 10000

wr1to2_cesm_boot = np.zeros(boot_iter)
wr1to3_cesm_boot = np.zeros(boot_iter)
wr1to4_cesm_boot = np.zeros(boot_iter)

wr2to1_cesm_boot = np.zeros(boot_iter)
wr2to3_cesm_boot = np.zeros(boot_iter)
wr2to4_cesm_boot = np.zeros(boot_iter)

wr3to1_cesm_boot = np.zeros(boot_iter)
wr3to2_cesm_boot = np.zeros(boot_iter)
wr3to4_cesm_boot = np.zeros(boot_iter)

wr4to1_cesm_boot = np.zeros(boot_iter)
wr4to2_cesm_boot = np.zeros(boot_iter)
wr4to3_cesm_boot = np.zeros(boot_iter)

for k in range(boot_iter):

    np.random.seed(k + 1)
    rand_indx = [np.random.choice(wr1_cesm_next.shape[0]) for ids in range(wr1_cesm_next.shape[0])]
    wr1to2_cesm_boot[k] = wr1_cesm_next[rand_indx][wr1_cesm_next[rand_indx] == 1.].shape[0] / wr1_cesm_next.shape[0]
    wr1to3_cesm_boot[k] = wr1_cesm_next[rand_indx][wr1_cesm_next[rand_indx] == 2.].shape[0] / wr1_cesm_next.shape[0]
    wr1to4_cesm_boot[k] = wr1_cesm_next[rand_indx][wr1_cesm_next[rand_indx] == 3.].shape[0] / wr1_cesm_next.shape[0]

    np.random.seed(k + 1)
    rand_indx = [np.random.choice(wr2_cesm_next.shape[0]) for ids in range(wr2_cesm_next.shape[0])]
    wr2to1_cesm_boot[k] = wr2_cesm_next[rand_indx][wr2_cesm_next[rand_indx] == 0.].shape[0] / wr2_cesm_next.shape[0]
    wr2to3_cesm_boot[k] = wr2_cesm_next[rand_indx][wr2_cesm_next[rand_indx] == 2.].shape[0] / wr2_cesm_next.shape[0]
    wr2to4_cesm_boot[k] = wr2_cesm_next[rand_indx][wr2_cesm_next[rand_indx] == 3.].shape[0] / wr2_cesm_next.shape[0]

    np.random.seed(k + 1)
    rand_indx = [np.random.choice(wr3_cesm_next.shape[0]) for ids in range(wr3_cesm_next.shape[0])]
    wr3to1_cesm_boot[k] = wr3_cesm_next[rand_indx][wr3_cesm_next[rand_indx] == 0.].shape[0] / wr3_cesm_next.shape[0]
    wr3to2_cesm_boot[k] = wr3_cesm_next[rand_indx][wr3_cesm_next[rand_indx] == 1.].shape[0] / wr3_cesm_next.shape[0]
    wr3to4_cesm_boot[k] = wr3_cesm_next[rand_indx][wr3_cesm_next[rand_indx] == 3.].shape[0] / wr3_cesm_next.shape[0]

    np.random.seed(k + 1)
    rand_indx = [np.random.choice(wr4_cesm_next.shape[0]) for ids in range(wr4_cesm_next.shape[0])]
    wr4to1_cesm_boot[k] = wr4_cesm_next[rand_indx][wr4_cesm_next[rand_indx] == 0.].shape[0] / wr4_cesm_next.shape[0]
    wr4to2_cesm_boot[k] = wr4_cesm_next[rand_indx][wr4_cesm_next[rand_indx] == 1.].shape[0] / wr4_cesm_next.shape[0]
    wr4to3_cesm_boot[k] = wr4_cesm_next[rand_indx][wr4_cesm_next[rand_indx] == 2.].shape[0] / wr4_cesm_next.shape[0]

In [21]:
print('')
print('weather regime 1 to weather regimes 2, 3, and 4')
print(np.nanpercentile(wr1to2_cesm_boot, q=[0.5, 2.5, 97.5, 99.5]))
print(np.nanpercentile(wr1to3_cesm_boot, q=[0.5, 2.5, 97.5, 99.5]))
print(np.nanpercentile(wr1to4_cesm_boot, q=[0.5, 2.5, 97.5, 99.5]))
print('')
print('weather regime 2 to weather regimes 1, 3, and 4')
print(np.nanpercentile(wr2to1_cesm_boot, q=[0.5, 2.5, 97.5, 99.5]))
print(np.nanpercentile(wr2to3_cesm_boot, q=[0.5, 2.5, 97.5, 99.5]))
print(np.nanpercentile(wr2to4_cesm_boot, q=[0.5, 2.5, 97.5, 99.5]))
print('')
print('weather regime 3 to weather regimes 1, 2, and 4')
print(np.nanpercentile(wr3to1_cesm_boot, q=[0.5, 2.5, 97.5, 99.5]))
print(np.nanpercentile(wr3to2_cesm_boot, q=[0.5, 2.5, 97.5, 99.5]))
print(np.nanpercentile(wr3to4_cesm_boot, q=[0.5, 2.5, 97.5, 99.5]))
print('')
print('weather regime 4 to weather regimes 1, 2, and 3')
print(np.nanpercentile(wr4to1_cesm_boot, q=[0.5, 2.5, 97.5, 99.5]))
print(np.nanpercentile(wr4to2_cesm_boot, q=[0.5, 2.5, 97.5, 99.5]))
print(np.nanpercentile(wr4to3_cesm_boot, q=[0.5, 2.5, 97.5, 99.5]))


weather regime 1 to weather regimes 2, 3, and 4
[0.11 0.13 0.29 0.32]
[0.21 0.24 0.42 0.45]
[0.34 0.36 0.56 0.59]

weather regime 2 to weather regimes 1, 3, and 4
[0.12781955 0.15037594 0.29323308 0.31578947]
[0.29323308 0.31578947 0.48120301 0.5037594 ]
[0.27819549 0.30075188 0.46616541 0.4962406 ]

weather regime 3 to weather regimes 1, 2, and 4
[0.11267606 0.12676056 0.25352113 0.27464789]
[0.21830986 0.23943662 0.3943662  0.41549296]
[0.38732394 0.4084507  0.57746479 0.6056338 ]

weather regime 4 to weather regimes 1, 2, and 3
[0.27710843 0.30120482 0.44578313 0.46987952]
[0.24096386 0.26506024 0.40361446 0.42771084]
[0.20481928 0.22891566 0.36159639 0.39156627]


### bootstrap two-tailed

In [22]:
# what regime follows first regime?

thearray = cluster_cesm_evo

boot_iter = 10000

wr1to2_cesm_boot = np.zeros(boot_iter)
wr1to3_cesm_boot = np.zeros(boot_iter)
wr1to4_cesm_boot = np.zeros(boot_iter)

wr2to1_cesm_boot = np.zeros(boot_iter)
wr2to3_cesm_boot = np.zeros(boot_iter)
wr2to4_cesm_boot = np.zeros(boot_iter)

wr3to1_cesm_boot = np.zeros(boot_iter)
wr3to2_cesm_boot = np.zeros(boot_iter)
wr3to4_cesm_boot = np.zeros(boot_iter)

wr4to1_cesm_boot = np.zeros(boot_iter)
wr4to2_cesm_boot = np.zeros(boot_iter)
wr4to3_cesm_boot = np.zeros(boot_iter)

for k in range(boot_iter):

    np.random.seed(k + 1)
    rand_indx = [np.random.choice(543) for ids in range(543)]

    wr1_cesm_next = []
    wr2_cesm_next = []
    wr3_cesm_next = []
    wr4_cesm_next = []

    # number of forecasts
    for i in rand_indx:

        for j in range(43):

            if j == 0:

                wr_ = thearray[0, i, j]

            if j > 0:

                if wr_ != thearray[0, i, j]:

                    if wr_ == 0.0:
                        wr1_cesm_next.append(thearray[0, i, j])
                    if wr_ == 1.0:
                        wr2_cesm_next.append(thearray[0, i, j])
                    if wr_ == 2.0:
                        wr3_cesm_next.append(thearray[0, i, j])
                    if wr_ == 3.0:
                        wr4_cesm_next.append(thearray[0, i, j])
                    break

    wr1_cesm_next = np.array(wr1_cesm_next)
    wr2_cesm_next = np.array(wr2_cesm_next)
    wr3_cesm_next = np.array(wr3_cesm_next)
    wr4_cesm_next = np.array(wr4_cesm_next)

    wr1to2_cesm_boot[k] = wr1_cesm_next[wr1_cesm_next == 1.].shape[0] / wr1_cesm_next.shape[0]
    wr1to3_cesm_boot[k] = wr1_cesm_next[wr1_cesm_next == 2.].shape[0] / wr1_cesm_next.shape[0]
    wr1to4_cesm_boot[k] = wr1_cesm_next[wr1_cesm_next == 3.].shape[0] / wr1_cesm_next.shape[0]

    wr2to1_cesm_boot[k] = wr2_cesm_next[wr2_cesm_next == 0.].shape[0] / wr2_cesm_next.shape[0]
    wr2to3_cesm_boot[k] = wr2_cesm_next[wr2_cesm_next == 2.].shape[0] / wr2_cesm_next.shape[0]
    wr2to4_cesm_boot[k] = wr2_cesm_next[wr2_cesm_next == 3.].shape[0] / wr2_cesm_next.shape[0]

    wr3to1_cesm_boot[k] = wr3_cesm_next[wr3_cesm_next == 0.].shape[0] / wr3_cesm_next.shape[0]
    wr3to2_cesm_boot[k] = wr3_cesm_next[wr3_cesm_next == 1.].shape[0] / wr3_cesm_next.shape[0]
    wr3to4_cesm_boot[k] = wr3_cesm_next[wr3_cesm_next == 3.].shape[0] / wr3_cesm_next.shape[0]

    wr4to1_cesm_boot[k] = wr4_cesm_next[wr4_cesm_next == 0.].shape[0] / wr4_cesm_next.shape[0]
    wr4to2_cesm_boot[k] = wr4_cesm_next[wr4_cesm_next == 1.].shape[0] / wr4_cesm_next.shape[0]
    wr4to3_cesm_boot[k] = wr4_cesm_next[wr4_cesm_next == 2.].shape[0] / wr4_cesm_next.shape[0]

In [24]:
print('')
print('weather regime 1 to weather regimes 2, 3, and 4')
print(np.nanpercentile(wr1to2_cesm_boot, q=[0.5, 2.5, 97.5, 99.5]))
print(np.nanpercentile(wr1to3_cesm_boot, q=[0.5, 2.5, 97.5, 99.5]))
print(np.nanpercentile(wr1to4_cesm_boot, q=[0.5, 2.5, 97.5, 99.5]))
print('')
print('weather regime 2 to weather regimes 1, 3, and 4')
print(np.nanpercentile(wr2to1_cesm_boot, q=[0.5, 2.5, 97.5, 99.5]))
print(np.nanpercentile(wr2to3_cesm_boot, q=[0.5, 2.5, 97.5, 99.5]))
print(np.nanpercentile(wr2to4_cesm_boot, q=[0.5, 2.5, 97.5, 99.5]))
print('')
print('weather regime 3 to weather regimes 1, 2, and 4')
print(np.nanpercentile(wr3to1_cesm_boot, q=[0.5, 2.5, 97.5, 99.5]))
print(np.nanpercentile(wr3to2_cesm_boot, q=[0.5, 2.5, 97.5, 99.5]))
print(np.nanpercentile(wr3to4_cesm_boot, q=[0.5, 2.5, 97.5, 99.5]))
print('')
print('weather regime 4 to weather regimes 1, 2, and 3')
print(np.nanpercentile(wr4to1_cesm_boot, q=[0.5, 2.5, 97.5, 99.5]))
print(np.nanpercentile(wr4to2_cesm_boot, q=[0.5, 2.5, 97.5, 99.5]))
print(np.nanpercentile(wr4to3_cesm_boot, q=[0.5, 2.5, 97.5, 99.5]))


weather regime 1 to weather regimes 2, 3, and 4
[0.10784156 0.13186813 0.29245283 0.32075472]
[0.2162104  0.23958333 0.42201835 0.45744681]
[0.33333333 0.36363636 0.55882353 0.59139785]

weather regime 2 to weather regimes 1, 3, and 4
[0.13043478 0.1496063  0.29134678 0.31297938]
[0.28888889 0.31538462 0.48360656 0.51127906]
[0.27692058 0.30147059 0.46808933 0.49334373]

weather regime 3 to weather regimes 1, 2, and 4
[0.10869481 0.12751678 0.25714811 0.28000127]
[0.21854305 0.24113475 0.39437099 0.42384106]
[0.38271423 0.41044298 0.57516788 0.60000699]

weather regime 4 to weather regimes 1, 2, and 3
[0.27499927 0.30069816 0.44970492 0.47402764]
[0.23999898 0.25954078 0.40340909 0.42483742]
[0.20381562 0.2261864  0.36477987 0.38829906]
