# 01_define_extremes.ipynb
### Standardize anomalies and define extremes

This notebook loads the data from [00_preprocessing.ipynb](00_preprocess.ipynb) (outlined in red below) and detects extremes values. Detrended anomalies are standardized by the monthly mean standard deviation and extreme values are identified using the 90th percentile at each spatial point.


![preprocess_flow](images/01_detect_extremes.png)


In [53]:
import xarray as xr
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

#### Import data
- preprocessed monthly averaged OISST v2.1 data created in [00_preprocessing.ipynb](00_preprocess.ipynb)


In [2]:
ds = xr.open_mfdataset('/burg/abernathey/users/hillary/oisst_v21/preprocessed_OISSTv21.nc')
ds

Unnamed: 0,Array,Chunk
Bytes,3.69 GiB,3.69 GiB
Shape,"(478, 720, 1440)","(478, 720, 1440)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 3.69 GiB 3.69 GiB Shape (478, 720, 1440) (478, 720, 1440) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",1440  720  478,

Unnamed: 0,Array,Chunk
Bytes,3.69 GiB,3.69 GiB
Shape,"(478, 720, 1440)","(478, 720, 1440)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.69 GiB,3.69 GiB
Shape,"(478, 720, 1440)","(478, 720, 1440)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 3.69 GiB 3.69 GiB Shape (478, 720, 1440) (478, 720, 1440) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",1440  720  478,

Unnamed: 0,Array,Chunk
Bytes,3.69 GiB,3.69 GiB
Shape,"(478, 720, 1440)","(478, 720, 1440)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.69 GiB,3.69 GiB
Shape,"(478, 720, 1440)","(478, 720, 1440)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 3.69 GiB 3.69 GiB Shape (478, 720, 1440) (478, 720, 1440) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",1440  720  478,

Unnamed: 0,Array,Chunk
Bytes,3.69 GiB,3.69 GiB
Shape,"(478, 720, 1440)","(478, 720, 1440)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.69 GiB,3.69 GiB
Shape,"(478, 720, 1440)","(478, 720, 1440)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 3.69 GiB 3.69 GiB Shape (478, 720, 1440) (478, 720, 1440) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",1440  720  478,

Unnamed: 0,Array,Chunk
Bytes,3.69 GiB,3.69 GiB
Shape,"(478, 720, 1440)","(478, 720, 1440)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.73 kiB,3.73 kiB
Shape,"(478,)","(478,)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 3.73 kiB 3.73 kiB Shape (478,) (478,) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",478  1,

Unnamed: 0,Array,Chunk
Bytes,3.73 kiB,3.73 kiB
Shape,"(478,)","(478,)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.73 kiB,3.73 kiB
Shape,"(478,)","(478,)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 3.73 kiB 3.73 kiB Shape (478,) (478,) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",478  1,

Unnamed: 0,Array,Chunk
Bytes,3.73 kiB,3.73 kiB
Shape,"(478,)","(478,)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.73 kiB,3.73 kiB
Shape,"(478,)","(478,)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 3.73 kiB 3.73 kiB Shape (478,) (478,) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",478  1,

Unnamed: 0,Array,Chunk
Bytes,3.73 kiB,3.73 kiB
Shape,"(478,)","(478,)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.69 GiB,3.69 GiB
Shape,"(478, 720, 1440)","(478, 720, 1440)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 3.69 GiB 3.69 GiB Shape (478, 720, 1440) (478, 720, 1440) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",1440  720  478,

Unnamed: 0,Array,Chunk
Bytes,3.69 GiB,3.69 GiB
Shape,"(478, 720, 1440)","(478, 720, 1440)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.69 GiB,3.69 GiB
Shape,"(478, 720, 1440)","(478, 720, 1440)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 3.69 GiB 3.69 GiB Shape (478, 720, 1440) (478, 720, 1440) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",1440  720  478,

Unnamed: 0,Array,Chunk
Bytes,3.69 GiB,3.69 GiB
Shape,"(478, 720, 1440)","(478, 720, 1440)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.69 GiB,3.69 GiB
Shape,"(478, 720, 1440)","(478, 720, 1440)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 3.69 GiB 3.69 GiB Shape (478, 720, 1440) (478, 720, 1440) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",1440  720  478,

Unnamed: 0,Array,Chunk
Bytes,3.69 GiB,3.69 GiB
Shape,"(478, 720, 1440)","(478, 720, 1440)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.69 GiB,3.69 GiB
Shape,"(478, 720, 1440)","(478, 720, 1440)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 3.69 GiB 3.69 GiB Shape (478, 720, 1440) (478, 720, 1440) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",1440  720  478,

Unnamed: 0,Array,Chunk
Bytes,3.69 GiB,3.69 GiB
Shape,"(478, 720, 1440)","(478, 720, 1440)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.69 GiB,3.69 GiB
Shape,"(478, 720, 1440)","(478, 720, 1440)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 3.69 GiB 3.69 GiB Shape (478, 720, 1440) (478, 720, 1440) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",1440  720  478,

Unnamed: 0,Array,Chunk
Bytes,3.69 GiB,3.69 GiB
Shape,"(478, 720, 1440)","(478, 720, 1440)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.69 GiB,3.69 GiB
Shape,"(478, 720, 1440)","(478, 720, 1440)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 3.69 GiB 3.69 GiB Shape (478, 720, 1440) (478, 720, 1440) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",1440  720  478,

Unnamed: 0,Array,Chunk
Bytes,3.69 GiB,3.69 GiB
Shape,"(478, 720, 1440)","(478, 720, 1440)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray


#### Standardize SST anomalies by dividing by the monthly standard deviatoin
This places equal variance on SSTa at all spatial points

In [56]:
# compute monthly standard deviation
stdev_notrend = ds.ssta_notrend.groupby(ds.time.dt.month).std()

# divide by standard deviation
ssta_stn_notrend = ds.ssta_notrend.groupby(ds.time.dt.month)/stdev_notrend


#### Calculate 90th percentile threshold from the origonal detrended SST anomalies

In [54]:
# Define extreme threshold as the 90th percentile
thresh = .9

# Rechunk time dim
if ssta_stn_notrend.chunks:
    ssta_stn_notrend = ssta_stn_notrend.chunk({'time': -1})
    
# Compute values of the monthly threshold
prct_notrend = ssta_stn_notrend.quantile(thresh, dim='time', keep_attrs=True, skipna=True)


#### Find where SSTA exceeds the 90th percentile threshold

In [57]:
# Take only the SSTa that exceeds the calculated monthly threshold
mhw_ssta_notrend = ds.ssta_notrend.where(ds.ssta_notrend>prct_notrend)
mhw_ssta_notrend_noNino3 = ds.ssta_notrend_noNino3.where(ds.ssta_notrend_noNino3 > prct_notrend)
mhw_ssta_notrend_noNino3_4 = ds.ssta_notrend_noNino3_4.where(ds.ssta_notrend_noNino3_4 > prct_notrend)
mhw_ssta_notrend_noNino4 = ds.ssta_notrend_noNino4.where(ds.ssta_notrend_noNino4 > prct_notrend)

### Save output 

In [61]:
# Specify path to save output 
path = '/burg/abernathey/users/hillary/oisst_v21/'

ds_new = xr.Dataset(
    data_vars=dict(
        ssta_stn_notrend=(['time', 'lat', 'lon'], ssta_stn_notrend.data),
        prct_notrend = (['lat', 'lon'], prct_notrend.data),
        
        mhw_ssta_notrend = (['time', 'lat', 'lon'], mhw_ssta_notrend.data),
        mhw_ssta_notrend_noNino3 = (['time', 'lat', 'lon'], mhw_ssta_notrend_noNino3.data),
        mhw_ssta_notrend_noNino3_4=(['time', 'lat', 'lon'], mhw_ssta_notrend_noNino3_4.data),
        mhw_ssta_notrend_noNino4=(['time', 'lat', 'lon'], mhw_ssta_notrend_noNino4.data),     
    ),
    coords=dict(
        time=(['time'], ds.time.data),
        lat=(['lat'], ds.lat.data),
        lon=(['lon'], ds.lon.data),
    ),
    attrs=dict(description="standardize SST anomalies and find positive extreme values", 
               threshold=thresh, 
               data='preprocessed monthly averaged OISST v2.1'),
)

ds_new.to_netcdf(path+'positive_extremes_OISSTv21.nc', mode='w')

In [62]:
ds_new

Unnamed: 0,Array,Chunk
Bytes,3.69 GiB,7.91 MiB
Shape,"(478, 720, 1440)","(1, 720, 1440)"
Count,588 Tasks,478 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 3.69 GiB 7.91 MiB Shape (478, 720, 1440) (1, 720, 1440) Count 588 Tasks 478 Chunks Type float64 numpy.ndarray",1440  720  478,

Unnamed: 0,Array,Chunk
Bytes,3.69 GiB,7.91 MiB
Shape,"(478, 720, 1440)","(1, 720, 1440)"
Count,588 Tasks,478 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,7.91 MiB,7.91 MiB
Shape,"(720, 1440)","(720, 1440)"
Count,595 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 7.91 MiB 7.91 MiB Shape (720, 1440) (720, 1440) Count 595 Tasks 1 Chunks Type float64 numpy.ndarray",1440  720,

Unnamed: 0,Array,Chunk
Bytes,7.91 MiB,7.91 MiB
Shape,"(720, 1440)","(720, 1440)"
Count,595 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.69 GiB,3.69 GiB
Shape,"(478, 720, 1440)","(478, 720, 1440)"
Count,598 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 3.69 GiB 3.69 GiB Shape (478, 720, 1440) (478, 720, 1440) Count 598 Tasks 1 Chunks Type float64 numpy.ndarray",1440  720  478,

Unnamed: 0,Array,Chunk
Bytes,3.69 GiB,3.69 GiB
Shape,"(478, 720, 1440)","(478, 720, 1440)"
Count,598 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.69 GiB,3.69 GiB
Shape,"(478, 720, 1440)","(478, 720, 1440)"
Count,600 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 3.69 GiB 3.69 GiB Shape (478, 720, 1440) (478, 720, 1440) Count 600 Tasks 1 Chunks Type float64 numpy.ndarray",1440  720  478,

Unnamed: 0,Array,Chunk
Bytes,3.69 GiB,3.69 GiB
Shape,"(478, 720, 1440)","(478, 720, 1440)"
Count,600 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.69 GiB,3.69 GiB
Shape,"(478, 720, 1440)","(478, 720, 1440)"
Count,600 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 3.69 GiB 3.69 GiB Shape (478, 720, 1440) (478, 720, 1440) Count 600 Tasks 1 Chunks Type float64 numpy.ndarray",1440  720  478,

Unnamed: 0,Array,Chunk
Bytes,3.69 GiB,3.69 GiB
Shape,"(478, 720, 1440)","(478, 720, 1440)"
Count,600 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.69 GiB,3.69 GiB
Shape,"(478, 720, 1440)","(478, 720, 1440)"
Count,600 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 3.69 GiB 3.69 GiB Shape (478, 720, 1440) (478, 720, 1440) Count 600 Tasks 1 Chunks Type float64 numpy.ndarray",1440  720  478,

Unnamed: 0,Array,Chunk
Bytes,3.69 GiB,3.69 GiB
Shape,"(478, 720, 1440)","(478, 720, 1440)"
Count,600 Tasks,1 Chunks
Type,float64,numpy.ndarray
