# 02_define_extremes.ipynb
### Standardize anomalies and define extremes

This notebook loads the data from [01_ENSO_models.ipynb](01_ENSO_models.ipynb) and detects extremes values. Detrended anomalies are standardized by the monthly mean standard deviation and extreme values are identified using the 90th percentile at each spatial point.


![preprocess_flow](images/02_define_extremes.png)


In [1]:
import xarray as xr
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

#### Import data
- preprocessed monthly averaged and detrended OISST v2.1 data with Nino indices regressed out.


In [2]:
file_path_list = ('/burg/abernathey/users/hillary/oisst_v21/ssta_notrend_noNino3.nc',
                  '/burg/abernathey/users/hillary/oisst_v21/ssta_notrend_noNino3_4.nc',
                  '/burg/abernathey/users/hillary/oisst_v21/ssta_notrend_noNino4.nc',
                  '/burg/abernathey/users/hillary/oisst_v21/preprocessed_OISSTv21.nc')

ds = xr.open_mfdataset(file_path_list)
ds

Unnamed: 0,Array,Chunk
Bytes,1.87 kiB,1.87 kiB
Shape,"(478,)","(478,)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.87 kiB 1.87 kiB Shape (478,) (478,) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",478  1,

Unnamed: 0,Array,Chunk
Bytes,1.87 kiB,1.87 kiB
Shape,"(478,)","(478,)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.69 GiB,3.69 GiB
Shape,"(478, 720, 1440)","(478, 720, 1440)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 3.69 GiB 3.69 GiB Shape (478, 720, 1440) (478, 720, 1440) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",1440  720  478,

Unnamed: 0,Array,Chunk
Bytes,3.69 GiB,3.69 GiB
Shape,"(478, 720, 1440)","(478, 720, 1440)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.87 kiB,1.87 kiB
Shape,"(478,)","(478,)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.87 kiB 1.87 kiB Shape (478,) (478,) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",478  1,

Unnamed: 0,Array,Chunk
Bytes,1.87 kiB,1.87 kiB
Shape,"(478,)","(478,)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.69 GiB,3.69 GiB
Shape,"(478, 720, 1440)","(478, 720, 1440)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 3.69 GiB 3.69 GiB Shape (478, 720, 1440) (478, 720, 1440) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",1440  720  478,

Unnamed: 0,Array,Chunk
Bytes,3.69 GiB,3.69 GiB
Shape,"(478, 720, 1440)","(478, 720, 1440)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.87 kiB,1.87 kiB
Shape,"(478,)","(478,)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.87 kiB 1.87 kiB Shape (478,) (478,) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",478  1,

Unnamed: 0,Array,Chunk
Bytes,1.87 kiB,1.87 kiB
Shape,"(478,)","(478,)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.69 GiB,3.69 GiB
Shape,"(478, 720, 1440)","(478, 720, 1440)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 3.69 GiB 3.69 GiB Shape (478, 720, 1440) (478, 720, 1440) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",1440  720  478,

Unnamed: 0,Array,Chunk
Bytes,3.69 GiB,3.69 GiB
Shape,"(478, 720, 1440)","(478, 720, 1440)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.96 MiB,3.96 MiB
Shape,"(720, 1440)","(720, 1440)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.96 MiB 3.96 MiB Shape (720, 1440) (720, 1440) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",1440  720,

Unnamed: 0,Array,Chunk
Bytes,3.96 MiB,3.96 MiB
Shape,"(720, 1440)","(720, 1440)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.69 GiB,3.69 GiB
Shape,"(478, 720, 1440)","(478, 720, 1440)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 3.69 GiB 3.69 GiB Shape (478, 720, 1440) (478, 720, 1440) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",1440  720  478,

Unnamed: 0,Array,Chunk
Bytes,3.69 GiB,3.69 GiB
Shape,"(478, 720, 1440)","(478, 720, 1440)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.69 GiB,3.69 GiB
Shape,"(478, 720, 1440)","(478, 720, 1440)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 3.69 GiB 3.69 GiB Shape (478, 720, 1440) (478, 720, 1440) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",1440  720  478,

Unnamed: 0,Array,Chunk
Bytes,3.69 GiB,3.69 GiB
Shape,"(478, 720, 1440)","(478, 720, 1440)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.69 GiB,3.69 GiB
Shape,"(478, 720, 1440)","(478, 720, 1440)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 3.69 GiB 3.69 GiB Shape (478, 720, 1440) (478, 720, 1440) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",1440  720  478,

Unnamed: 0,Array,Chunk
Bytes,3.69 GiB,3.69 GiB
Shape,"(478, 720, 1440)","(478, 720, 1440)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.69 GiB,3.69 GiB
Shape,"(478, 720, 1440)","(478, 720, 1440)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 3.69 GiB 3.69 GiB Shape (478, 720, 1440) (478, 720, 1440) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",1440  720  478,

Unnamed: 0,Array,Chunk
Bytes,3.69 GiB,3.69 GiB
Shape,"(478, 720, 1440)","(478, 720, 1440)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.69 GiB,3.69 GiB
Shape,"(478, 720, 1440)","(478, 720, 1440)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 3.69 GiB 3.69 GiB Shape (478, 720, 1440) (478, 720, 1440) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",1440  720  478,

Unnamed: 0,Array,Chunk
Bytes,3.69 GiB,3.69 GiB
Shape,"(478, 720, 1440)","(478, 720, 1440)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray


#### Standardize SST anomalies by dividing by the monthly standard deviatoin
This places equal variance on SSTa at all spatial points

In [3]:
# compute monthly standard deviation
stdev_notrend = ds.ssta_notrend.groupby(ds.time.dt.month).std()
stdev_trend = ds.ssta_trend.groupby(ds.time.dt.month).std()
# stdev_noNino3 = ds.ssta_notrend_noNino3.groupby(ds.time.dt.month).std()
# stdev_noNino3_4 = ds.ssta_notrend_noNino3_4.groupby(ds.time.dt.month).std()
# stdev_NoNino4 = ds.ssta_notrend_noNino4.groupby(ds.time.dt.month).std()


# divide by standard deviation
ssta_stn_notrend = ds.ssta_notrend.groupby(ds.time.dt.month)/stdev_notrend
ssta_stn_trend = ds.ssta_trend.groupby(ds.time.dt.month)/stdev_trend
# ssta_stn_noNino3 = ds.ssta_notrend_noNino3.groupby(ds.time.dt.month)/stdev_noNino3
# ssta_stn_noNino3_4 = ds.ssta_notrend_noNino3_4.groupby(ds.time.dt.month)/stdev_noNino3_4
# ssta_stn_noNino4 = ds.ssta_notrend_noNino4.groupby(ds.time.dt.month)/stdev_NoNino4


#### Calculate 90th percentile threshold from the origonal detrended SST anomalies

In [4]:
# Define extreme threshold as the 90th percentile
thresh = .9

# Rechunk time dim
if ssta_stn_notrend.chunks:
    ssta_stn_notrend = ssta_stn_notrend.chunk({'time': -1})
    ssta_stn_trend = ssta_stn_trend.chunk({'time': -1})
#     ssta_stn_noNino3 = ssta_stn_noNino3.chunk({'time': -1})
#     ssta_stn_noNino3_4 = ssta_stn_noNino3_4.chunk({'time': -1})
#     ssta_stn_noNino4 = ssta_stn_noNino4.chunk({'time': -1})
    
# Compute values of the monthly threshold
prct_notrend = ssta_stn_notrend.quantile(thresh, dim='time', keep_attrs=True, skipna=True)
prct_trend = ssta_stn_trend.quantile(thresh, dim='time', keep_attrs=True, skipna=True)

# prct_noNino3 = ssta_stn_noNino3.groupby(ds.time.dt.month).quantile(thresh, dim='time', keep_attrs=True, skipna=True)
# prct_noNino3_4 = ssta_stn_noNino3_4.groupby(ds.time.dt.month).quantile(thresh, dim='time', keep_attrs=True, skipna=True)
# prct_noNino4 = ssta_stn_noNino4.groupby(ds.time.dt.month).quantile(thresh, dim='time', keep_attrs=True, skipna=True)



In [5]:
prct_notrend

Unnamed: 0,Array,Chunk
Bytes,7.91 MiB,7.91 MiB
Shape,"(720, 1440)","(720, 1440)"
Count,595 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 7.91 MiB 7.91 MiB Shape (720, 1440) (720, 1440) Count 595 Tasks 1 Chunks Type float64 numpy.ndarray",1440  720,

Unnamed: 0,Array,Chunk
Bytes,7.91 MiB,7.91 MiB
Shape,"(720, 1440)","(720, 1440)"
Count,595 Tasks,1 Chunks
Type,float64,numpy.ndarray


#### Find where SSTA exceeds the 90th percentile threshold

In [6]:
# Take only the SSTa that exceeds the calculated monthly threshold
mhw_ssta_trend = ds.ssta_trend.where(ds.ssta_trend>prct_trend)
mhw_ssta_notrend = ds.ssta_notrend.where(ds.ssta_notrend>prct_notrend)

mhw_ssta_notrend_noNino3 = ds.ssta_notrend_noNino3.where(ds.ssta_notrend_noNino3 > prct_notrend)
mhw_ssta_notrend_noNino3_4 = ds.ssta_notrend_noNino3_4.where(ds.ssta_notrend_noNino3_4 > prct_notrend)
mhw_ssta_notrend_noNino4 = ds.ssta_notrend_noNino4.where(ds.ssta_notrend_noNino4 > prct_notrend)


#### Quick sanity check

In [None]:
# No Trend
fig, (ax1, ax2) = plt.subplots(1,2, figsize=(20,8))
ds.ssta_notrend.sel(time=('2016-12-01')).plot(ax=ax1)
mhw_ssta_notrend.sel(time=('2016-12-01')).plot(ax=ax2);


In [None]:
# Trend
fig, (ax1, ax2) = plt.subplots(1,2, figsize=(20,8))
ds.ssta_trend.sel(time=('2016-12-01')).plot(ax=ax1)
mhw_ssta_trend.sel(time=('2016-12-01')).plot(ax=ax2);


In [None]:
ds.ssta_trend.mean(('lat','lon')).plot()
ds.ssta_notrend.mean(('lat','lon')).plot()


In [None]:
ds.ssta_notrend_noNino3.mean(('lat','lon')).plot()
ds.ssta_notrend.mean(('lat','lon')).plot()


In [None]:
mhw_ssta_notrend.mean(('lat','lon')).plot()
mhw_ssta_trend.mean(('lat','lon')).plot()


### Save output 

In [7]:
# Specify path to save output 
path = '/burg/abernathey/users/hillary/oisst_v21/'

ds_new = xr.Dataset(
    data_vars=dict(
        ssta_stn_notrend=(['time', 'lat', 'lon'], ssta_stn_notrend.data),
        ssta_stn_trend=(['time', 'lat', 'lon'], ssta_stn_trend.data),
        
        prct_notrend = (['lat', 'lon'], prct_notrend.data),
        prct_trend = (['lat', 'lon'], prct_trend.data),
        
        mhw_ssta_trend = (['time', 'lat', 'lon'], mhw_ssta_trend.data),
        mhw_ssta_notrend = (['time', 'lat', 'lon'], mhw_ssta_notrend.data),
        mhw_ssta_notrend_noNino3 = (['time', 'lat', 'lon'], mhw_ssta_notrend_noNino3.data),
        mhw_ssta_notrend_noNino3_4=(['time', 'lat', 'lon'], mhw_ssta_notrend_noNino3_4.data),
        mhw_ssta_notrend_noNino4=(['time', 'lat', 'lon'], mhw_ssta_notrend_noNino4.data),     
    ),
    coords=dict(
        time=(['time'], ds.time.data),
        lat=(['lat'], ds.lat.data),
        lon=(['lon'], ds.lon.data),
    ),
    attrs=dict(description="standardized SST anomalies above positive extreme threshold", 
               threshold=thresh, 
               data='preprocessed monthly averaged OISST v2.1'),
)

ds_new.to_netcdf(path+'positive_extremes_OISSTv21.nc', mode='w')

In [8]:
print('done')

done
