In [239]:
%reset

Once deleted, variables cannot be recovered. Proceed (y/[n])? y


In [240]:
import numpy as np
import scipy as sp
import sklearn as sk
import cartopy as cp
import xarray as xr
import pandas as pd
import cftime
import dateutil
import dask
#Use the 2 lines below if the notebook has a dark theme (to make labelling visible):
from jupyterthemes import jtplot
jtplot.style(theme='chesterish', context='notebook', ticks=True, grid=False)
import matplotlib.pyplot as plt
#The following code resets the default plot size so you don't have to fiddle with figsize every time"
fig_size = plt.rcParams["figure.figsize"]
fig_size[0] = 16
fig_size[1] = 8
plt.rcParams["figure.figsize"] = fig_size

In [241]:
#Need the following line to avoid hdf5 issues that prevent opening thee file
# https://stackoverflow.com/questions/49317927/errno-101-netcdf-hdf-error-when-opening-netcdf-file
%env HDF5_USE_FILE_LOCKING=FALSE

env: HDF5_USE_FILE_LOCKING=FALSE


In [242]:
from netCDF4 import Dataset
from datetime import datetime, timedelta
from netCDF4 import num2date, date2num

In [243]:
def covariance(x, y, dims=None):
    return xr.dot(x - x.mean(dims), y - y.mean(dims), dims=dims) / x.count(dims)

def correlation(x, y, dims=None):
    return covariance(x, y, dims) / (x.std(dims) * y.std(dims))

# LOAD DATA

## AIRI Data

In [244]:
dataset_file = "pALLIN.nc"
airi_dat = xr.open_dataset(dataset_file, decode_times=False)
units, reference_date = airi_dat.time.attrs['units'].split('since')
airi_dat['time'] = pd.date_range(start=reference_date, periods=airi_dat.sizes['time'], freq='MS')
#This file's calendar isn't recognized when using xr.open_dataset. The above workaround is from: 
#https://stackoverflow.com/questions/55648630/how-to-decode-the-time-variable-while-using-xarray-to-load-a-netcdf-file
# "M" means "month end frequency" (see https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases)
precip = airi_dat['precip']

In [245]:
#Get precip anomalies
precip_clim = precip.groupby("time.month").mean("time")
precip_anomfull = precip.groupby("time.month") - precip_clim

In [247]:
def is_jjas(month):
    return (month >= 6) & (month <= 9)
precip = precip_anomfull.sel(time=is_jjas(precip_anomfull['time.month'])).groupby('time.year').sum(dim='time')

monthtime = pd.date_range('1871-01-01', freq='Y', periods=146)
precip['year'] = monthtime
precip = precip.rename({'year': 'time'})

In [248]:
precip_subset1 = precip.sel(time=slice('1900-01-01','1901-12-31'))
precip_subset2 = precip.sel(time=slice('1903-01-01','1906-12-31'))
precip_subset3 = precip.sel(time=slice('1908-01-01','1911-12-31'))
precip_subset4 = precip.sel(time=slice('1913-01-01','1913-12-31'))
precip_subset5 = precip.sel(time=slice('1915-01-01','1918-12-31'))
precip_subset6 = precip.sel(time=slice('1920-01-01','1921-12-31'))
precip_subset7 = precip.sel(time=slice('1923-01-01','1927-12-31'))
precip_subset8 = precip.sel(time=slice('1929-01-01','1931-12-31'))
precip_subset9 = precip.sel(time=slice('1934-01-01','1939-12-31'))
precip_subset10 = precip.sel(time=slice('1941-01-01','1948-12-31'))
precip_subset11 = precip.sel(time=slice('1951-01-01','1955-12-31'))
precip_subset12 = precip.sel(time=slice('1957-01-01','1958-12-31'))
precip_subset13 = precip.sel(time=slice('1960-01-01','1962-12-31'))
precip_subset13 = precip.sel(time=slice('1964-01-01','1965-12-31'))
precip_subset14 = precip.sel(time=slice('1967-01-01','1971-12-31'))
precip_subset15 = precip.sel(time=slice('1973-01-01','1976-12-31'))
precip_subset16 = precip.sel(time=slice('1978-01-01','1979-12-31'))
precip_subset17 = precip.sel(time=slice('1981-01-01','1982-12-31'))
precip_subset18 = precip.sel(time=slice('1984-01-01','1991-12-31'))
precip_subset19 = precip.sel(time=slice('1993-01-01','1998-12-31'))
precip_subset20 = precip.sel(time=slice('2000-01-01','2002-12-31'))
precip_subset21 = precip.sel(time=slice('2004-01-01','2006-12-31'))
precip_subset22 = precip.sel(time=slice('2008-01-01','2010-12-31'))
precip_subset23 = precip.sel(time=slice('2012-01-01','2015-12-31'))

precip_anom = xr.merge([precip_subset1, precip_subset2, precip_subset3, precip_subset4, precip_subset5, 
                   precip_subset6, precip_subset7, precip_subset8, precip_subset9, precip_subset10, 
                   precip_subset11, precip_subset12, precip_subset13, precip_subset14, precip_subset15, 
                   precip_subset16, precip_subset17, precip_subset18, precip_subset19, precip_subset20, 
                   precip_subset21, precip_subset22, precip_subset23])
precip_anomtemp = xr.Dataset.to_array(precip_anom)
precipa_jjas = precip_anom['precip']

## SST Data

In [249]:
dataset_url = "https://psl.noaa.gov/thredds/dodsC/Datasets/noaa.ersst.v5/sst.mnmean.nc"
sst_dat = xr.open_dataset(dataset_url)
sst = sst_dat['sst']

In [250]:
#Slice SST data to the same duration as precip and calculate monthly anomalies
sst_subset = sst.sel(time=slice('1871-01-30','2017-01-01'))
sst_clim = sst_subset.groupby('time.month').mean('time')
sst_anomfull = sst_subset.groupby("time.month")-sst_clim

  return np.nanmean(a, axis=axis, dtype=dtype)


In [251]:
sst_subset1 = sst_anomfull.sel(time=slice('1900-01-01','1901-12-31'))
sst_subset2 = sst_anomfull.sel(time=slice('1903-01-01','1906-12-31'))
sst_subset3 = sst_anomfull.sel(time=slice('1908-01-01','1911-12-31'))
sst_subset4 = sst_anomfull.sel(time=slice('1913-01-01','1913-12-31'))
sst_subset5 = sst_anomfull.sel(time=slice('1915-01-01','1918-12-31'))
sst_subset6 = sst_anomfull.sel(time=slice('1920-01-01','1921-12-31'))
sst_subset7 = sst_anomfull.sel(time=slice('1923-01-01','1927-12-31'))
sst_subset8 = sst_anomfull.sel(time=slice('1929-01-01','1931-12-31'))
sst_subset9 = sst_anomfull.sel(time=slice('1934-01-01','1939-12-31'))
sst_subset10 = sst_anomfull.sel(time=slice('1941-01-01','1948-12-31'))
sst_subset11 = sst_anomfull.sel(time=slice('1951-01-01','1955-12-31'))
sst_subset12 = sst_anomfull.sel(time=slice('1957-01-01','1958-12-31'))
sst_subset13 = sst_anomfull.sel(time=slice('1960-01-01','1962-12-31'))
sst_subset13 = sst_anomfull.sel(time=slice('1964-01-01','1965-12-31'))
sst_subset14 = sst_anomfull.sel(time=slice('1967-01-01','1971-12-31'))
sst_subset15 = sst_anomfull.sel(time=slice('1973-01-01','1976-12-31'))
sst_subset16 = sst_anomfull.sel(time=slice('1978-01-01','1979-12-31'))
sst_subset17 = sst_anomfull.sel(time=slice('1981-01-01','1982-12-31'))
sst_subset18 = sst_anomfull.sel(time=slice('1984-01-01','1991-12-31'))
sst_subset19 = sst_anomfull.sel(time=slice('1993-01-01','1998-12-31'))
sst_subset20 = sst_anomfull.sel(time=slice('2000-01-01','2002-12-31'))
sst_subset21 = sst_anomfull.sel(time=slice('2004-01-01','2006-12-31'))
sst_subset22 = sst_anomfull.sel(time=slice('2008-01-01','2010-12-31'))
sst_subset23 = sst_anomfull.sel(time=slice('2012-01-01','2015-12-31'))

sst_anomtemp = xr.merge([sst_subset1, sst_subset2, sst_subset3, sst_subset4, sst_subset5, 
                   sst_subset6, sst_subset7, sst_subset8, sst_subset9, sst_subset10, 
                   sst_subset11, sst_subset12, sst_subset13, sst_subset14, sst_subset15, 
                   sst_subset16, sst_subset17, sst_subset18, sst_subset19, sst_subset20, 
                   sst_subset21, sst_subset22, sst_subset23])
sst_anom = sst_anomtemp['sst']

## SLP Data

In [252]:
dataset_url = "/global/scratch/medhap02/ISMData/prmsl.mon.mean.nc"
slp_dat = xr.open_dataset(dataset_url)
slp = slp_dat['prmsl']

In [253]:
#Slice SLP data to the same duration as precip and calculate monthly anomalies
slp_subset = slp.sel(time=slice('1871-01-01','2015-12-31'))
slp_clim = slp_subset.groupby('time.month').mean('time')
slp_anomfull = slp_subset.groupby("time.month")-slp_clim

In [254]:
slp_subset1 = slp_anomfull.sel(time=slice('1900-01-01','1901-12-31'))
slp_subset2 = slp_anomfull.sel(time=slice('1903-01-01','1906-12-31'))
slp_subset3 = slp_anomfull.sel(time=slice('1908-01-01','1911-12-31'))
slp_subset4 = slp_anomfull.sel(time=slice('1913-01-01','1913-12-31'))
slp_subset5 = slp_anomfull.sel(time=slice('1915-01-01','1918-12-31'))
slp_subset6 = slp_anomfull.sel(time=slice('1920-01-01','1921-12-31'))
slp_subset7 = slp_anomfull.sel(time=slice('1923-01-01','1927-12-31'))
slp_subset8 = slp_anomfull.sel(time=slice('1929-01-01','1931-12-31'))
slp_subset9 = slp_anomfull.sel(time=slice('1934-01-01','1939-12-31'))
slp_subset10 = slp_anomfull.sel(time=slice('1941-01-01','1948-12-31'))
slp_subset11 = slp_anomfull.sel(time=slice('1951-01-01','1955-12-31'))
slp_subset12 = slp_anomfull.sel(time=slice('1957-01-01','1958-12-31'))
slp_subset13 = slp_anomfull.sel(time=slice('1960-01-01','1962-12-31'))
slp_subset13 = slp_anomfull.sel(time=slice('1964-01-01','1965-12-31'))
slp_subset14 = slp_anomfull.sel(time=slice('1967-01-01','1971-12-31'))
slp_subset15 = slp_anomfull.sel(time=slice('1973-01-01','1976-12-31'))
slp_subset16 = slp_anomfull.sel(time=slice('1978-01-01','1979-12-31'))
slp_subset17 = slp_anomfull.sel(time=slice('1981-01-01','1982-12-31'))
slp_subset18 = slp_anomfull.sel(time=slice('1984-01-01','1991-12-31'))
slp_subset19 = slp_anomfull.sel(time=slice('1993-01-01','1998-12-31'))
slp_subset20 = slp_anomfull.sel(time=slice('2000-01-01','2002-12-31'))
slp_subset21 = slp_anomfull.sel(time=slice('2004-01-01','2006-12-31'))
slp_subset22 = slp_anomfull.sel(time=slice('2008-01-01','2010-12-31'))
slp_subset23 = slp_anomfull.sel(time=slice('2012-01-01','2015-12-31'))

slp_anomtemp = xr.merge([slp_subset1, slp_subset2, slp_subset3, slp_subset4, slp_subset5, 
                   slp_subset6, slp_subset7, slp_subset8, slp_subset9, slp_subset10, 
                   slp_subset11, slp_subset12, slp_subset13, slp_subset14, slp_subset15, 
                   slp_subset16, slp_subset17, slp_subset18, slp_subset19, slp_subset20, 
                   slp_subset21, slp_subset22, slp_subset23])
slp_anom = slp_anomtemp['prmsl']

## MSE Data

In [255]:
dataset_url = "MSE_20thcentury.nc"
mse_dat1 = xr.open_dataset('/global/scratch/medhap02/ISMData/MSE_20thcentury.nc')
mse_dat = mse_dat1.rename({'LON':'lon', 'LAT':'lat','TIME':'time'})
mse = mse_dat['MSE']

In [256]:
#Slice MSE data to the same duration as precip and calculate monthly anomalies
mse_subset = mse.sel(time=slice('1871-01-30','2017-01-01'))
mse_clim = mse_subset.groupby('time.month').mean('time')
mse_anomfull = mse_subset.groupby("time.month")-mse_clim

In [257]:
mse_subset1 = mse_anomfull.sel(time=slice('1900-01-01','1901-12-31'))
mse_subset2 = mse_anomfull.sel(time=slice('1903-01-01','1906-12-31'))
mse_subset3 = mse_anomfull.sel(time=slice('1908-01-01','1911-12-31'))
mse_subset4 = mse_anomfull.sel(time=slice('1913-01-01','1913-12-31'))
mse_subset5 = mse_anomfull.sel(time=slice('1915-01-01','1918-12-31'))
mse_subset6 = mse_anomfull.sel(time=slice('1920-01-01','1921-12-31'))
mse_subset7 = mse_anomfull.sel(time=slice('1923-01-01','1927-12-31'))
mse_subset8 = mse_anomfull.sel(time=slice('1929-01-01','1931-12-31'))
mse_subset9 = mse_anomfull.sel(time=slice('1934-01-01','1939-12-31'))
mse_subset10 = mse_anomfull.sel(time=slice('1941-01-01','1948-12-31'))
mse_subset11 = mse_anomfull.sel(time=slice('1951-01-01','1955-12-31'))
mse_subset12 = mse_anomfull.sel(time=slice('1957-01-01','1958-12-31'))
mse_subset13 = mse_anomfull.sel(time=slice('1960-01-01','1962-12-31'))
mse_subset13 = mse_anomfull.sel(time=slice('1964-01-01','1965-12-31'))
mse_subset14 = mse_anomfull.sel(time=slice('1967-01-01','1971-12-31'))
mse_subset15 = mse_anomfull.sel(time=slice('1973-01-01','1976-12-31'))
mse_subset16 = mse_anomfull.sel(time=slice('1978-01-01','1979-12-31'))
mse_subset17 = mse_anomfull.sel(time=slice('1981-01-01','1982-12-31'))
mse_subset18 = mse_anomfull.sel(time=slice('1984-01-01','1991-12-31'))
mse_subset19 = mse_anomfull.sel(time=slice('1993-01-01','1998-12-31'))
mse_subset20 = mse_anomfull.sel(time=slice('2000-01-01','2002-12-31'))
mse_subset21 = mse_anomfull.sel(time=slice('2004-01-01','2006-12-31'))
mse_subset22 = mse_anomfull.sel(time=slice('2008-01-01','2010-12-31'))
mse_subset23 = mse_anomfull.sel(time=slice('2012-01-01','2015-12-31'))

mse_anomtemp = xr.merge([mse_subset1, mse_subset2, mse_subset3, mse_subset4, mse_subset5, 
                   mse_subset6, mse_subset7, mse_subset8, mse_subset9, mse_subset10, 
                   mse_subset11, mse_subset12, mse_subset13, mse_subset14, mse_subset15, 
                   mse_subset16, mse_subset17, mse_subset18, mse_subset19, mse_subset20, 
                   mse_subset21, mse_subset22, mse_subset23])
mse_anom = mse_anomtemp['MSE']

# Correlation Comparisons

## EPT

In [258]:
#EPT original

#DSST1
latrange1 = sst_anom.sel(lat = slice(5, -20)).lat
weights1 = np.cos(latrange1*np.pi/180)

marchdata1 = sst_anom.sel(lat = slice(5, -20), lon = slice(150, 170), time = (sst_anom['time.month']==3))
marchSST1 = marchdata1.sel(time = slice('1900-03-01', '2015-03-01'))
marchmean1 = (marchSST1*weights1).mean({'lon', 'lat'})/weights1.mean() 

aprildata1 = sst_anom.sel(lat = slice(5, -20), lon = slice(150, 170), time = (sst_anom['time.month']==4))
aprilSST1 = aprildata1.sel(time = slice('1900-04-01', '2015-04-01'))
aprilmean1 = (aprilSST1*weights1).mean({'lon', 'lat'})/weights1.mean() 

maydata1 = sst_anom.sel(lat = slice(5, -20), lon = slice(150, 170), time = (sst_anom['time.month']==5))
maySST1 = maydata1.sel(time = slice('1900-05-01', '2015-05-01'))
maymean1 = (maySST1*weights1).mean({'lon', 'lat'})/weights1.mean()

marchmean1.time.data = maymean1.time.data
aprilmean1.time.data = maymean1.time.data
DSST1 = (maymean1 - marchmean1)

#DSST2
latrange2 = sst_anom.sel(lat = slice(10, -10)).lat
weights2 = np.cos(latrange2*np.pi/180)

marchdata2 = sst_anom.sel(lat = slice(10, -10), lon = slice(250, 280), time = (sst_anom['time.month']==3))
marchSST2 = marchdata2.sel(time = slice('1900-03-01', '2015-03-01'))
marchmean2 = (marchSST2*weights2).mean({'lon', 'lat'})/weights2.mean()

aprildata2 = sst_anom.sel(lat = slice(10, -10), lon = slice(250, 280), time = (sst_anom['time.month']==4))
aprilSST2 = aprildata2.sel(time = slice('1900-04-01', '2015-04-01'))
aprilmean2 = (aprilSST2*weights2).mean({'lon', 'lat'})/weights2.mean() 

maydata2 = sst_anom.sel(lat = slice(10, -10), lon = slice(250, 280), time = (sst_anom['time.month']==5))
maySST2 = maydata2.sel(time = slice('1900-05-01', '2015-05-01'))
maymean2 = (maySST2*weights2).mean({'lon', 'lat'})/weights2.mean()

marchmean2.time.data = maymean2.time.data
aprilmean2.time.data = maymean2.time.data
DSST2 = (maymean2 - marchmean2)

EPToriginal = DSST1 - DSST2
EPTstd = EPToriginal.std()
EPTmean = EPToriginal.mean()
EPToriginal = (EPToriginal - EPTmean)/EPTstd

In [259]:
#EPT centered on equator
#(30n-30s, 155-175e)
#(10n-10s, 115-80w)

#DSST1
latrange1 = sst_anom.sel(lat = slice(30, -30)).lat
weights1 = np.cos(latrange1*np.pi/180)

marchdata1 = sst_anom.sel(lat = slice(30, -30), lon = slice(155, 175), time = (sst_anom['time.month']==3))
marchSST1 = marchdata1.sel(time = slice('1900-03-01', '2015-03-01'))
marchmean1 = (marchSST1*weights1).mean({'lon', 'lat'})/weights1.mean() 

aprildata1 = sst_anom.sel(lat = slice(30, -30), lon = slice(155, 175), time = (sst_anom['time.month']==4))
aprilSST1 = aprildata1.sel(time = slice('1900-04-01', '2015-04-01'))
aprilmean1 = (aprilSST1*weights1).mean({'lon', 'lat'})/weights1.mean() 

maydata1 = sst_anom.sel(lat = slice(30, -30), lon = slice(155, 175), time = (sst_anom['time.month']==5))
maySST1 = maydata1.sel(time = slice('1900-05-01', '2015-05-01'))
maymean1 = (maySST1*weights1).mean({'lon', 'lat'})/weights1.mean()

marchmean1.time.data = maymean1.time.data
aprilmean1.time.data = maymean1.time.data
DSST1maymarcentered = maymean1-marchmean1

#DSST2
latrange2 = sst_anom.sel(lat = slice(10, -10)).lat
weights2 = np.cos(latrange2*np.pi/180)

marchdata2 = sst_anom.sel(lat = slice(10, -10), lon = slice(245, 280), time = (sst_anom['time.month']==3))
marchSST2 = marchdata2.sel(time = slice('1900-03-01', '2015-03-01'))
marchmean2 = (marchSST2*weights2).mean({'lon', 'lat'})/weights2.mean()

aprildata2 = sst_anom.sel(lat = slice(10, -10), lon = slice(245, 280), time = (sst_anom['time.month']==4))
aprilSST2 = aprildata2.sel(time = slice('1900-04-01', '2015-04-01'))
aprilmean2 = (aprilSST2*weights2).mean({'lon', 'lat'})/weights2.mean() 

maydata2 = sst_anom.sel(lat = slice(10, -10), lon = slice(245, 280), time = (sst_anom['time.month']==5))
maySST2 = maydata2.sel(time = slice('1900-05-01', '2015-05-01'))
maymean2 = (maySST2*weights2).mean({'lon', 'lat'})/weights2.mean()

marchmean2.time.data = maymean2.time.data
aprilmean2.time.data = maymean2.time.data
DSST2maymarcentered = maymean2-marchmean2

EPTmaymarcentered = DSST1maymarcentered - DSST2maymarcentered
EPTstd = EPTmaymarcentered.std()
EPTmean = EPTmaymarcentered.mean()
EPTmaymarcentered = (EPTmaymarcentered - EPTmean)/EPTstd

In [260]:
precipa_jjas.time.data = EPToriginal.time.data

eptoriginal_subtraction = correlation(EPToriginal, precipa_jjas, dims='time')
print("EPT Original", eptoriginal_subtraction.data)
ept_australia = correlation(DSST1, precipa_jjas, dims='time')
print("DSST1(australia)", ept_australia.data)
ept_pacific = correlation(DSST2, precipa_jjas, dims='time')
print("DSST2(pacific)", ept_pacific.data)

print()
eptmaymarcentered_subtraction = correlation(EPTmaymarcentered, precipa_jjas, dims='time')
print("EPT May minus March expanded and centered", eptmaymarcentered_subtraction.data)
ept_australia = correlation(DSST1maymarcentered, precipa_jjas, dims='time')
print("DSST1(australia) may-march centered", ept_australia.data)
ept_pacific = correlation(DSST2maymarcentered, precipa_jjas, dims='time')
print("DSST2(pacific) may-march expanded", ept_pacific.data)

EPT Original 0.4090730265875154
DSST1(australia) 0.12665882952604268
DSST2(pacific) -0.4011344140241192

EPT May minus March expanded and centered 0.47599220442933515
DSST1(australia) may-march centered 0.2736985273531448
DSST2(pacific) may-march expanded -0.39627879610891786


## CPT

In [261]:
#CP-ENSOpredictor

#DSST1
latrange1 = sst_anom.sel(lat = slice(-10, -25)).lat
weights1 = np.cos(latrange1*np.pi/180)

aprildata1 = sst_anom.sel(lat = slice(-10, -25), lon = slice(170, 200), time = (sst_anom['time.month']==4))
aprilSST1 = aprildata1.sel(time = slice('1900-04-01', '2015-04-01'))
aprilmean1 = (aprilSST1*weights1).mean({'lon', 'lat'})/weights1.mean()

maydata1 = sst_anom.sel(lat = slice(-10, -25), lon = slice(170, 200), time = (sst_anom['time.month']==5))
maySST1 = maydata1.sel(time = slice('1900-05-01', '2015-05-01'))
maymean1 = (maySST1*weights1).mean({'lon', 'lat'})/weights1.mean()

aprilmean1.time.data = maymean1.time.data
DSST1 = maymean1 - aprilmean1

#DSST2
latrange2 = sst_anom.sel(lat = slice(20, 5)).lat
weights2 = np.cos(latrange2*np.pi/180)

aprildata2 = sst_anom.sel(lat = slice(20, 5), lon = slice(180, 210), time = (sst_anom['time.month']==4))
aprilSST2 = aprildata2.sel(time = slice('1900-04-01', '2015-04-01'))
aprilmean2 = (aprilSST2*weights2).mean({'lon', 'lat'})/weights2.mean()
                          
maydata2 = sst_anom.sel(lat = slice(20, 5), lon = slice(180, 210), time = (sst_anom['time.month']==5))
maySST2 = maydata2.sel(time = slice('1900-05-01', '2015-05-01'))
maymean2 = (maySST2*weights2).mean({'lon', 'lat'})/weights2.mean()
                          
aprilmean2.time.data = maymean2.time.data
DSST2 = maymean2 - aprilmean2

CPToriginal = DSST1 - DSST2
CPTstd = CPToriginal.std()
CPTmean = CPToriginal.mean()
CPToriginal = (CPToriginal - CPTmean)/CPTstd

In [262]:
#(55-5s, 175e-155w)
latrange1 = sst_anom.sel(lat = slice(-5, -55)).lat
weights1 = np.cos(latrange1*np.pi/180)

aprildata1 = sst_anom.sel(lat = slice(-5, -55), lon = slice(175, 205), time = (sst_anom['time.month']==4))
aprilSST1 = aprildata1.sel(time = slice('1900-04-01', '2015-04-01'))
aprilmean1 = (aprilSST1*weights1).mean({'lon', 'lat'})/weights1.mean()

maydata1 = sst_anom.sel(lat = slice(-5, -55), lon = slice(175, 205), time = (sst_anom['time.month']==5))
maySST1 = maydata1.sel(time = slice('1900-05-01', '2015-05-01'))
maymean1 = (maySST1*weights1).mean({'lon', 'lat'})/weights1.mean()

aprilmean1.time.data = maymean1.time.data
DSST1new = maymean1 - aprilmean1

CPT = DSST1new
CPTstd = CPT.std()
CPTmean = CPT.mean()
CPT = (CPT - CPTmean)/CPTstd

In [263]:
cptoriginal_subtraction = correlation(CPToriginal, precipa_jjas, dims='time')
print("CPT Original", cptoriginal_subtraction.data)
cpt_southpacific = correlation(DSST1, precipa_jjas, dims='time')
print("DSST1", cpt_southpacific.data)
cpt_northpacific = correlation(DSST2, precipa_jjas, dims='time')
print("DSST2", cpt_northpacific.data)

print()
cptnew = correlation(CPT, precipa_jjas, dims='time')
print("CPT New", cptnew.data)
cpt_pacific = correlation(DSST1new, precipa_jjas, dims='time')
print("DSST", cpt_pacific.data)

CPT Original 0.3021634264413822
DSST1 0.33710169311785176
DSST2 -0.06835847735950427

CPT New 0.4316652279987759
DSST 0.43166521784841083


# PSH 1 (atlantic)

In [264]:
#mega-ENSOpredictor

#NPcalculation
latrange1 = slp_anom.sel(lat = slice(-40, -10)).lat
weights1 = np.cos(latrange1*np.pi/180)

marchdata1 = slp_anom.sel(lat = slice(-40, -10), lon = slice(200, 270), time = (slp_anom['time.month']==3))
marchSLP1 = marchdata1.sel(time=slice('1900-03-01','2015-03-01'))
marchmean1 = ((marchSLP1*weights1).mean({'lon', 'lat'})/weights1.mean())

aprildata1 = slp_anom.sel(lat = slice(-40, -10), lon = slice(200, 270), time = (slp_anom['time.month']==4))
aprilSLP1 = aprildata1.sel(time=slice('1900-04-01','2015-04-01'))
aprilmean1 = ((aprilSLP1*weights1).mean({'lon', 'lat'})/weights1.mean())

maydata1 = slp_anom.sel(lat = slice(-40, -10), lon = slice(200, 270), time = (slp_anom['time.month']==5))
maySLP1 = maydata1.sel(time = slice('1900-05-01', '2015-05-01'))
maymean1 = ((maySLP1*weights1).mean({'lon', 'lat'})/weights1.mean())

marchmean1.time.data = maymean1.time.data
aprilmean1.time.data = maymean1.time.data
NP = ((aprilmean1 + maymean1)/2)

#SPcalculation
latrange2 = slp_anom.sel(lat = slice(10, 30)).lat
weights2 = np.cos(latrange2*np.pi/180)

marchdata2 = slp_anom.sel(lat = slice(10, 30), lon = slice(180, 230), time = (slp_anom['time.month']==3))
marchSLP2 = marchdata2.sel(time = slice('1900-03-01', '2015-03-01'))
marchmean2 = ((marchSLP2*weights2).mean({'lon', 'lat'})/weights2.mean())

aprildata2 = slp_anom.sel(lat = slice(10, 30), lon = slice(180, 230), time = (slp_anom['time.month']==4))
aprilSLP2 = aprildata2.sel(time = slice('1900-04-01', '2015-04-01'))
aprilmean2 = ((aprilSLP2*weights2).mean({'lon', 'lat'})/weights2.mean())

maydata2 = slp_anom.sel(lat = slice(10, 30), lon = slice(180, 230), time = (slp_anom['time.month']==5))
maySLP2 = maydata2.sel(time = slice('1900-05-01', '2015-05-01'))
maymean2 = ((maySLP2*weights2).mean({'lon', 'lat'})/weights2.mean())

marchmean2.time.data = maymean2.time.data
aprilmean2.time.data = maymean2.time.data
SP = ((aprilmean2 + maymean2)/2)

NParea = (30*weights1.mean() * 70)
SParea = (20*weights2.mean() * 50)
PSHoriginal = ((NP * NParea) + (SP * SParea))/(NParea + SParea)
PSHstd = PSHoriginal.std()
PSHmean = PSHoriginal.mean()
PSHoriginal = (PSHoriginal - PSHmean)/PSHstd

In [265]:
#(20-45s, 145-80w)
#(15-35n, 170-125w)

#mega-ENSOpredictor
# shifted boxes

#NPcalculation
latrange1 = slp_anom.sel(lat = slice(-45, -20)).lat
weights1 = np.cos(latrange1*np.pi/180)

marchdata1 = slp_anom.sel(lat = slice(-45, -20), lon = slice(215, 260), time = (slp_anom['time.month']==3))
marchSLP1 = marchdata1.sel(time=slice('1900-03-01','2015-03-01'))
marchmean1 = ((marchSLP1*weights1).mean({'lon', 'lat'})/weights1.mean())

aprildata1 = slp_anom.sel(lat = slice(-45, -20), lon = slice(215, 260), time = (slp_anom['time.month']==4))
aprilSLP1 = aprildata1.sel(time=slice('1900-04-01','2015-04-01'))
aprilmean1 = ((aprilSLP1*weights1).mean({'lon', 'lat'})/weights1.mean())

maydata1 = slp_anom.sel(lat = slice(-45, -20), lon = slice(215, 260), time = (slp_anom['time.month']==5))
maySLP1 = maydata1.sel(time = slice('1900-05-01', '2015-05-01'))
maymean1 = ((maySLP1*weights1).mean({'lon', 'lat'})/weights1.mean())

marchmean1.time.data = maymean1.time.data
aprilmean1.time.data = maymean1.time.data
NPshifted = ((marchmean1 + aprilmean1 + maymean1)/3)

#SPcalculation
latrange2 = slp_anom.sel(lat = slice(15, 35)).lat
weights2 = np.cos(latrange2*np.pi/180)

marchdata2 = slp_anom.sel(lat = slice(15, 35), lon = slice(190, 235), time = (slp_anom['time.month']==3))
marchSLP2 = marchdata2.sel(time = slice('1900-03-01', '2015-03-01'))
marchmean2 = ((marchSLP2*weights2).mean({'lon', 'lat'})/weights2.mean())

aprildata2 = slp_anom.sel(lat = slice(15, 35), lon = slice(190, 235), time = (slp_anom['time.month']==4))
aprilSLP2 = aprildata2.sel(time = slice('1900-04-01', '2015-04-01'))
aprilmean2 = ((aprilSLP2*weights2).mean({'lon', 'lat'})/weights2.mean())

maydata2 = slp_anom.sel(lat = slice(15, 35), lon = slice(190, 235), time = (slp_anom['time.month']==5))
maySLP2 = maydata2.sel(time = slice('1900-05-01', '2015-05-01'))
maymean2 = ((maySLP2*weights2).mean({'lon', 'lat'})/weights2.mean())

marchmean2.time.data = maymean2.time.data
aprilmean2.time.data = maymean2.time.data
SPshifted = ((marchmean2 + aprilmean2 + maymean2)/3)

NPareashifted = (25*weights1.mean() * 45)
SPareashifted = (20*weights2.mean() * 45)
PSHshiftedmam = ((NPshifted * NParea) + (SPshifted * SParea))/(NParea + SParea)
PSHstd = PSHshiftedmam.std()
PSHmean = PSHshiftedmam.mean()
PSHshiftedmam = (PSHshiftedmam - PSHmean)/PSHstd

In [266]:
pshmam_subtraction = correlation(PSHoriginal, precipa_jjas, dims='time')
print("PSH MAM", pshmam_subtraction.data)
pshmam_northpacific = correlation(NP, precipa_jjas, dims='time')
print("NP MAM", pshmam_northpacific.data)
pshmam_southpacific = correlation(SP, precipa_jjas, dims='time')
print("SP MAM", pshmam_southpacific.data)

print()
pshshiftedmam_subtraction = correlation(PSHshiftedmam, precipa_jjas, dims='time')
print("PSH Shifted MAM", pshshiftedmam_subtraction.data)
pshshiftedmam_northpacific = correlation(NPshifted, precipa_jjas, dims='time')
print("NP Shifted MAM", pshshiftedmam_northpacific.data)
pshshiftedmam_southpacific = correlation(SPshifted, precipa_jjas, dims='time')
print("SP ShiftedMAM", pshshiftedmam_southpacific.data)

PSH MAM 0.24828237180324592
NP MAM 0.21280531760667154
SP MAM 0.21882285167304077

PSH Shifted MAM 0.33212898497528126
NP Shifted MAM 0.2914889197868212
SP ShiftedMAM 0.27130302504067877


# PSH 2 (indian ocean) - new

In [267]:
#(0-20n, 40-120e)

latrange1 = slp_anom.sel(lat = slice(0, 20)).lat
weights1 = np.cos(latrange1*np.pi/180)

marchdata1 = slp_anom.sel(lat = slice(0, 20), lon = slice(40, 120), time = (slp_anom['time.month']==3))
marchSLP1 = marchdata1.sel(time = slice('1900-03-01', '2015-03-01'))
marchmean1 = (marchSLP1*weights1).mean({'lon', 'lat'})/weights1.mean()

aprildata1 = slp_anom.sel(lat = slice(0, 20), lon = slice(40, 120), time = (slp_anom['time.month']==4))
aprilSLP1 = aprildata1.sel(time = slice('1900-04-01', '2015-04-01'))
aprilmean1 = (aprilSLP1*weights1).mean({'lon', 'lat'})/weights1.mean()

maydata1 = slp_anom.sel(lat = slice(0, 20), lon = slice(40, 120), time = (slp_anom['time.month']==5))
maySLP1 = maydata1.sel(time = slice('1900-05-01', '2015-05-01'))
maymean1 = (maySLP1*weights1).mean({'lon', 'lat'})/weights1.mean()

marchmean1.time.data = maymean1.time.data
aprilmean1.time.data = maymean1.time.data

EQindia = (marchmean1 + aprilmean1 + maymean1)/3

PSHindia = EQindia
PSHstd = PSHindia.std()
PSHmean = PSHindia.mean()

PSHindia = (PSHindia - PSHmean)/PSHstd

In [268]:
pshnew_india = correlation(PSHindia, precipa_jjas, dims='time')
print("PSH Equatorial Indian Ocean", pshnew_india.data)

PSH Equatorial Indian Ocean -0.3177957291633576


## PSH New 
(original boxes minus equitorial indian ocean)

In [269]:
PSHtemp = ((NP * NParea) + (SP * SParea))/(NParea + SParea)
PSHnew = PSHtemp - EQindia

PSHnewstd = PSHnew.std()
PSHnewmean = PSHnew.mean()
PSHnew = (PSHnew - PSHnewmean)/PSHnewstd

In [270]:
PSHtempshifted = ((NPshifted * NPareashifted) + (SPshifted * SPareashifted))/(NPareashifted + SPareashifted)
PSHnewshifted = PSHtempshifted - EQindia

PSHnewstd = PSHnewshifted.std()
PSHnewmean = PSHnewshifted.mean()
PSHnewshifted = (PSHnewshifted - PSHnewmean)/PSHnewstd

In [271]:
pshnew = correlation(PSHnew, precipa_jjas, dims='time')
print("PSH Original Boxes minues Equitorial Indian Ocean", pshnew.data)
psh_original = correlation(PSHtemp, precipa_jjas, dims='time')
print("PSH Original Boxes", psh_original.data)
psh_eqindia = correlation(EQindia, precipa_jjas, dims='time')
print("EQ india", psh_eqindia.data)

print()
pshnewshifted = correlation(PSHnewshifted, precipa_jjas, dims='time')
print("PSH Shifted Boxes MAM minues Equitorial Indian Ocean", pshnewshifted.data)
psh_shiftedMAM = correlation(PSHtempshifted, precipa_jjas, dims='time')
print("PSH Shifted Boxes MAM", psh_shiftedMAM.data)
psh_eqindia = correlation(EQindia, precipa_jjas, dims='time')
print("EQ india", psh_eqindia.data)

PSH Original Boxes minues Equitorial Indian Ocean 0.3525487910815343
PSH Original Boxes 0.24828237180324583
EQ india -0.31779573783300546

PSH Shifted Boxes MAM minues Equitorial Indian Ocean 0.3855575689799471
PSH Shifted Boxes MAM 0.34284635449145295
EQ india -0.31779573783300546


## NAT (removed)

In [272]:
#AnomalousAsianLowpredictor

latrange = slp_anom.sel(lat = slice(40, 60)).lat
weights = np.cos(latrange*np.pi/180)

marchdata = slp_anom.sel(lat = slice(40, 60), lon = slice(95, 125), time = (slp_anom['time.month']==3))
marchSLP = marchdata.sel(time = slice('1900-03-01', '2015-03-01'))
marchmean = ((marchSLP*weights).mean({'lon', 'lat'})/weights.mean())

maydata = slp_anom.sel(lat = slice(40, 60), lon = slice(95, 125), time = (slp_anom['time.month']==5))
maySLP = maydata.sel(time = slice('1900-05-01', '2015-05-01'))
maymean = ((maySLP*weights).mean({'lon', 'lat'})/weights.mean())

marchmean.time.data = maymean.time.data

NATprimary = maymean - marchmean
NAT = maymean - marchmean
#print(NAT)
NATstd = NAT.std()
NATmean = NAT.mean()
NAT = (NAT - NATmean)/NATstd

nat = correlation(NAT, precipa_jjas, dims='time')
print("NAT", nat.data)

NAT -0.1763771450127206


## MSE

In [273]:
#(0-25n, 45-70e)
#(20-40n, 70-90e)

latrange1 = mse_anom.sel(lat = slice(0, 25)).lat
weights1 = np.cos(latrange1*np.pi/180)

marchdata1 = mse_anom.sel(lat = slice(0, 25), lon = slice(45, 70), time = (mse_anom['time.month']==3))
marchMSE1 = marchdata1.sel(time = slice('1900-03-01', '2015-03-01'))
marchmean1 = (marchMSE1*weights1).mean({'lon', 'lat'})/weights1.mean()

aprildata1 = mse_anom.sel(lat = slice(0, 25), lon = slice(45, 70), time = (mse_anom['time.month']==4))
aprilMSE1 = aprildata1.sel(time = slice('1900-04-01', '2015-04-01'))
aprilmean1 = (aprilMSE1*weights1).mean({'lon', 'lat'})/weights1.mean()

maydata1 = mse_anom.sel(lat = slice(0, 25), lon = slice(45, 70), time = (mse_anom['time.month']==5))
mayMSE1 = maydata1.sel(time = slice('1900-05-01', '2015-05-01'))
maymean1 = (mayMSE1*weights1).mean({'lon', 'lat'})/weights1.mean()

marchmean1.time.data = maymean1.time.data
aprilmean1.time.data = maymean1.time.data

MSEleft = maymean1 - marchmean1

latrange2 = mse_anom.sel(lat = slice(20, 40)).lat
weights2 = np.cos(latrange2*np.pi/180)

marchdata2 = mse_anom.sel(lat = slice(20, 40), lon = slice(70, 90), time = (mse_anom['time.month']==3))
marchMSE2 = marchdata2.sel(time = slice('1900-03-01', '2015-03-01'))
marchmean2 = ((marchMSE2*weights2).mean({'lon', 'lat'})/weights2.mean())

aprildata2 = mse_anom.sel(lat = slice(20, 40), lon = slice(70, 90), time = (mse_anom['time.month']==4))
aprilMSE2 = aprildata2.sel(time = slice('1900-04-01', '2015-04-01'))
aprilmean2 = ((aprilMSE2*weights2).mean({'lon', 'lat'})/weights2.mean())

maydata2 = mse_anom.sel(lat = slice(20, 40), lon = slice(70, 90), time = (mse_anom['time.month']==5))
mayMSE2 = maydata2.sel(time = slice('1900-05-01', '2015-05-01'))
maymean2 = ((mayMSE2*weights2).mean({'lon', 'lat'})/weights2.mean())

marchmean2.time.data = maymean2.time.data
aprilmean2.time.data = maymean2.time.data

MSEright = maymean2 - marchmean2

MSEmay_mar = (MSEright + MSEleft)/2
MSEstd = MSEmay_mar.std()
MSEmean = MSEmay_mar.mean()
MSEmay_mar = (MSEmay_mar - MSEmean)/MSEstd

In [274]:
#(45-70n, 95e-150w)

latrange = mse_anom.sel(lat = slice(45, 70)).lat
weights1 = np.cos(latrange*np.pi/180)

marchdata1 = mse_anom.sel(lat = slice(45, 70), lon = slice(95, 210), time = (mse_anom['time.month']==3))
marchMSE1 = marchdata1.sel(time = slice('1900-03-01', '2015-03-01'))
marchmean1 = (marchMSE1*weights1).mean({'lon', 'lat'})/weights1.mean()

maydata1 = mse_anom.sel(lat = slice(45, 70), lon = slice(95, 210), time = (mse_anom['time.month']==5))
mayMSE1 = maydata1.sel(time = slice('1900-05-01', '2015-05-01'))
maymean1 = (mayMSE1*weights1).mean({'lon', 'lat'})/weights1.mean()

marchmean1.time.data = maymean1.time.data

MSErussiainitial = maymean1 - marchmean1

MSErussia = MSErussiainitial
MSEstd = MSErussia.std()
MSEmean = MSErussia.mean()
MSErussia = (MSErussia - MSEmean)/MSEstd
print(MSErussia)

<xarray.DataArray (time: 88)>
array([ 0.703373, -0.420024, -0.735947, -0.128984,  0.070085, -0.557442,
        1.453159,  1.853357,  0.762672, -1.040786, -1.595467,  0.254367,
       -0.080102,  0.322635, -0.798798,  0.156482, -0.555605, -1.770474,
        0.466213,  0.484568, -0.360855,  1.193541,  0.649579, -1.334648,
       -2.218624,  0.348202, -0.561232,  0.678378,  0.992252, -0.525963,
       -0.981872,  0.68941 , -0.255552, -0.942193, -0.521181,  1.832442,
        0.982721,  0.628098, -1.017553,  0.41852 , -0.521885, -0.210203,
       -0.885643,  0.988456,  1.393629, -0.243851,  0.252325, -1.782069,
        0.671555, -1.918719,  0.53454 ,  1.117873,  0.796476,  0.325502,
        0.145092, -0.622136,  0.608512, -0.988359,  0.456163,  0.555907,
       -0.074356,  1.502024,  0.995766, -0.017734, -0.579755,  0.883712,
       -1.977925, -1.521291,  0.927713, -1.087153,  0.939036, -0.160292,
       -0.10923 , -0.21564 , -1.361189,  0.09094 ,  0.424206, -1.227248,
       -0.059124, -0.

In [275]:
msemay_mar = correlation(MSEmay_mar, precipa_jjas, dims='time')
print("MSE May - March 2 boxes", msemay_mar.data)
mseeast = correlation(MSEright, precipa_jjas, dims='time')
print("MSE Right", mseeast.data)
msewest = correlation(MSEleft, precipa_jjas, dims='time')
print("MSE Left", msewest.data)
print()
mserussia = correlation(MSErussia, precipa_jjas, dims='time')
print("MSE may - march Russia", mserussia.data)

msecorrmay_mar2boxesrussia = correlation(MSEmay_mar, MSErussia, dims='time')
print("Correlation between MSE May-March 2 boxes and Russia", msecorrmay_mar2boxesrussia.data)

MSE May - March 2 boxes 0.2542827284894997
MSE Right 0.22666060063016982
MSE Left 0.2565303134495685

MSE may - march Russia 0.21993838702674287
Correlation between MSE May-March 2 boxes and Russia 0.11566555826864745


In [276]:
MSEmay_marmean = (MSEright + MSEleft + MSErussiainitial)/3
MSEstd = MSEmay_marmean.std()
MSEmean = MSEmay_marmean.mean()
MSEmay_marmean = (MSEmay_marmean - MSEmean)/MSEstd

msemay_mar3boxes = correlation(MSEmay_marmean, precipa_jjas, dims='time')
print("All 3 boxes in May-March mean", msemay_mar3boxes.data)

All 3 boxes in May-March mean 0.30656883629138765


In [277]:
# Copyright (c) 2020 Medha Palavalli.