This notebook runs OceTrac on a subset of MUR data.

In [1]:
import shutil

import fsspec
import xarray as xr
import numpy as np
# import matplotlib.pyplot as plt
# import dask
# from dask.distributed import Client
# from zarr.errors import ContainsGroupError
# from xmhw import xmhw
import marineHeatWaves as mhw

# Set up Cluster

In [2]:
dask.config.set(temporary_directory='/data/pacific/rwegener/')

<dask.config.set at 0x7f28b045cee0>

In [3]:
client = Client(memory_limit='216GB')
print(client.dashboard_link)

http://127.0.0.1:8787/status


# Load Data

## MUR

In [2]:
# Block: LOAD ZARR (no task activity)
file_location = 's3://mur-sst/zarr'

ikey = fsspec.get_mapper(file_location, anon=True)

mur_full = xr.open_zarr(ikey, consolidated=True)
mur = mur_full['analysed_sst']

In [3]:
mur

Unnamed: 0,Array,Chunk
Bytes,15.19 TiB,245.78 MiB
Shape,"(6443, 17999, 36000)","(6443, 100, 100)"
Count,64801 Tasks,64800 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 15.19 TiB 245.78 MiB Shape (6443, 17999, 36000) (6443, 100, 100) Count 64801 Tasks 64800 Chunks Type float32 numpy.ndarray",36000  17999  6443,

Unnamed: 0,Array,Chunk
Bytes,15.19 TiB,245.78 MiB
Shape,"(6443, 17999, 36000)","(6443, 100, 100)"
Count,64801 Tasks,64800 Chunks
Type,float32,numpy.ndarray


In [9]:
mur.isel(time=0, lat=20, lon = -20).values

array(265.382, dtype=float32)

# Subset

In [3]:
# Block: SUBSET
# 4 chunk subset, ~110 MB total
mur_subset = mur.sel(lat=slice(32, 32.5), lon=slice(121.4, 122.2))

In [5]:
mur_subset

Unnamed: 0,Array,Chunk
Bytes,101.53 MiB,74.96 MiB
Shape,"(6443, 51, 81)","(6443, 50, 61)"
Count,64805 Tasks,4 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 101.53 MiB 74.96 MiB Shape (6443, 51, 81) (6443, 50, 61) Count 64805 Tasks 4 Chunks Type float32 numpy.ndarray",81  51  6443,

Unnamed: 0,Array,Chunk
Bytes,101.53 MiB,74.96 MiB
Shape,"(6443, 51, 81)","(6443, 50, 61)"
Count,64805 Tasks,4 Chunks
Type,float32,numpy.ndarray


### Exploration

In [None]:
mur_subset.max().compute()  # 304.75
mur_subset.min().compute()  # 265.382

In [None]:
%%time
mur_subset.isel(time=0).plot()

In [None]:
# oisst_subset = oisst_subset.chunk({'lat': 25, 'lon': 25, 'time': 2})
# ^ hot_water calc freaks out "dimension time ... consists of multiple chunks, but is 
# also a core dimension" if you rechunk like this

# Preprocess (xmhw)

In [9]:
from xmhw.xmhw import threshold, detect

In [None]:
clim = threshold(mur_subset)

# Preprocess (`xmhw` manual)

In [22]:
# xmhw/calc_clim()

rolled = mur_subset.rolling(time=5, center=True)  # .construct('wdim')
rolled

DataArrayRolling [time->5(center)]

In [24]:
rolled

DataArrayRolling [time->5(center)]

# Preprocess (EJO `marineHeatwaves`)

In [5]:
from datetime import datetime 

In [6]:
t = mur_subset.time

In [7]:
# Format time values
mur_t_dt = [datetime.strptime(str(time), '%Y-%m-%dT%H:%M:%S.000000000') for time in t.values]

mur_t_dt_ordinal = np.array([time.toordinal() for time in mur_t_dt])

In [8]:
# Extract sst as a numpy array
sst_np = mur_subset.values

In [11]:
%%time
for x in range(10):
    for y in range(10):
        mhws, clim = mhw.detect(mur_t_dt_ordinal, sst_np[:, x, y])

CPU times: user 11.5 s, sys: 0 ns, total: 11.5 s
Wall time: 11.5 s


In [10]:
clim['seas']

array([292.28060519, 292.42751878, 292.5712044 , ..., 280.86566753,
       280.7797064 , 280.69747039])

# Preprocess (old)

### Climatology and Anomaly
**Climatology**, follows [Ocetrac CMIP6 example](https://ocetrac.readthedocs.io/en/latest/examples/cmip6.html). Calculates monthly mean temperatures over full 18 year dataset.

**Anomoly** calculates deviation of each pixel from the monthly climatology for the year 2018.

In [7]:
%%time
# Block: CLIMATOLOGY & ANOMALY (yes task activity)
# climatology shape time=12, lat=51, lon=81
climatology = mur_subset.groupby(mur_subset.time.dt.month).mean()

# get data from only 2018 to calculate anomaly
mur_2018_subset = mur_subset.sel(time='2018-06')
anomaly = mur_2018_subset.groupby(mur_2018_subset.time.dt.month) - climatology
anomaly = anomaly.load()

CPU times: user 1.24 s, sys: 282 ms, total: 1.52 s
Wall time: 23.3 s


In [9]:
mur_2018_subset.groupby(mur_2018_subset.time.dt.month)

DataArrayGroupBy, grouped over 'month'
1 groups with labels 6.

### Calculate 90th Percentile Threshold Values

In [11]:
# Block: THRESHOLD & HOT WATER (yes task activity)
%time
percentile = 0.9
# .chunk(dict(time=-1)) fixes the 0 dim parallel error
# mur_subset = mur_subset.chunk(dict(time=-1))
# Threshold takes 90th percentile value for each month (It's like climatology but uses 90th
# instead of mean value for Combine)
threshold = mur_subset.groupby(mur_subset.time.dt.month).quantile(percentile, 
                                                                            dim='time', 
                                                                            keep_attrs=True, 
                                                                            skipna=True,
                                                                            )

# CMIP6 example has:
# hot_water = anomaly.groupby(mur_2018_subset.time.dt.month).where( ...
# But I don't think the additional groupby is necessary
# Follow up: a comparison with hw1.equals(hw2) shows they are the same
hot_water = anomaly.where(mur_2018_subset.groupby(mur_2018_subset.time.dt.month)>threshold)

hot_water = hot_water.load()

# 1 year of data (thresh + hot water calc): 1 m 37s wall time (default chunks)

CPU times: user 1 µs, sys: 1 µs, total: 2 µs
Wall time: 4.77 µs


### Visualize

In [None]:
# Visualize
plt.figure(figsize=(16,3))

ax1 = plt.subplot(121);anomaly.isel(time=0).plot(cmap='RdBu_r', vmin=-2, vmax=2, extend='both')
mask.where(mask==0).plot.contourf(colors='k', add_colorbar=False); ax1.set_aspect('equal');

ax2 = plt.subplot(122); hot_water.isel(time=0).plot(cmap='Reds', vmin=0);
mask.where(mask==0).plot.contourf(colors='k', add_colorbar=False); ax2.set_aspect('equal');