This notebook runs OceTrac on a subset of MUR data.

In [2]:
import shutil

import fsspec
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
from dask.distributed import Client
from zarr.errors import ContainsGroupError
import ocetrac

# Set up Cluster

In [None]:
client = Client()

In [None]:
client.dashboard_link

# Load Data

## MUR

In [3]:
# Block: LOAD ZARR (no task activity)
file_location = 's3://mur-sst/zarr'

ikey = fsspec.get_mapper(file_location, anon=True)

mur_full = xr.open_zarr(ikey, consolidated=True)
mur = mur_full['analysed_sst']

In [None]:
mur

# Subset

In [4]:
# Block: SUBSET
# 4 chunk subset, ~110 MB total
mur_subset = mur.sel(lat=slice(32, 32.5), lon=slice(121.4, 122.2))

In [None]:
mur_subset

### Exploration

In [None]:
mur_subset.max().compute()  # 304.75
mur_subset.min().compute()  # 265.382

In [None]:
%%time
mur_subset.isel(time=0).plot()

In [None]:
# oisst_subset = oisst_subset.chunk({'lat': 25, 'lon': 25, 'time': 2})
# ^ hot_water calc freaks out "dimension time ... consists of multiple chunks, but is 
# also a core dimension" if you rechunk like this

# Preprocess

### Climatology and Anomaly
**Climatology**, follows [Ocetrac CMIP6 example](https://ocetrac.readthedocs.io/en/latest/examples/cmip6.html). Calculates monthly mean temperatures over full 18 year dataset.

**Anomoly** calculates deviation of each pixel from the monthly climatology for the year 2018.

In [5]:
%%time
# Block: CLIMATOLOGY & ANOMALY (yes task activity)
# climatology shape time=12, lat=51, lon=81
climatology = mur_subset.groupby(mur_subset.time.dt.month).mean()

# get data from only 2018 to calculate anomaly
mur_2018_subset = mur_subset.sel(time='2018-06')
anomaly = mur_2018_subset.groupby(mur_2018_subset.time.dt.month) - climatology
anomaly = anomaly.load()

CPU times: user 1.94 s, sys: 995 ms, total: 2.94 s
Wall time: 8.3 s


### Create Land/Ocean Masks

**Assumption**: It seems like zarr doesn't mask out land values :/ So I am masking out any values below 270 K as land.

In [43]:
# Block: MASKS (yes task activity)
mur_subset_time0 = mur_2018_subset.isel(time=0)
mask = xr.where(mur_subset_time0 <= 270, 0, 1)
mask = mask.load()

In [None]:
mask.plot()

### Calculate 90th Percentile Threshold Values

In [48]:
# Block: THRESHOLD & HOT WATER (yes task activity)
%time
percentile = 0.9
# .chunk(dict(time=-1)) fixes the 0 dim parallel error
# mur_subset = mur_subset.chunk(dict(time=-1))
# Threshold takes 90th percentile value for each month (It's like climatology but uses 90th
# instead of mean value for Combine)
threshold = mur_subset.groupby(mur_subset.time.dt.month).quantile(percentile, 
                                                                            dim='time', 
                                                                            keep_attrs=True, 
                                                                            skipna=True,
                                                                            )

# CMIP6 example has:
# hot_water = anomaly.groupby(mur_2018_subset.time.dt.month).where( ...
# But I don't think the additional groupby is necessary
# Follow up: a comparison with hw1.equals(hw2) shows they are the same
hot_water = anomaly.where(mur_2018_subset.groupby(mur_2018_subset.time.dt.month)>threshold)

hot_water = hot_water.load()

# 1 year of data (thresh + hot water calc): 1 m 37s wall time (default chunks)

CPU times: user 3 µs, sys: 1e+03 ns, total: 4 µs
Wall time: 9.3 µs


### Visualize

In [None]:
# Visualize
plt.figure(figsize=(16,3))

ax1 = plt.subplot(121);anomaly.isel(time=0).plot(cmap='RdBu_r', vmin=-2, vmax=2, extend='both')
mask.where(mask==0).plot.contourf(colors='k', add_colorbar=False); ax1.set_aspect('equal');

ax2 = plt.subplot(122); hot_water.isel(time=0).plot(cmap='Reds', vmin=0);
mask.where(mask==0).plot.contourf(colors='k', add_colorbar=False); ax2.set_aspect('equal');

# Run Ocetrac

In [8]:
hot_water = hot_water.rename({'lon':'x', 'lat':'y'})

### Save anomoly and land mask

In [62]:
hot_water_path = './data/hot_water.nc'
hot_water.to_netcdf(hot_water_path)

In [64]:
xr.open_dataset(hot_water_path)['analysed_sst']

In [68]:
hot_water.to_dataset().to_zarr(hot_water_path, mode='w', consolidated=True)

<xarray.backends.zarr.ZarrStore at 0x7fb4883717b0>

In [67]:
hot_water_path = './data/hot_water.zarr'
try:
    hot_water.to_dataset().to_zarr(hot_water_path, consolidated=True)
except ContainsGroupError:
    shutil.rmtree(hot_water_path)
    hot_water.to_dataset().to_zarr(hot_water_path)


In [58]:
land_mask_path = './data/land_mask.zarr'
try:
    mask.to_dataset(name='land_mask').to_zarr(land_mask_path)
except ContainsGroupError:
    shutil.rmtree(land_mask_path)
    mask.to_dataset(name='land_mask').to_zarr(land_mask_path)

In [59]:
xr.open_zarr('./data/hot_water.zarr')['analysed_sst']

Unnamed: 0,Array,Chunk
Bytes,484.10 kiB,242.05 kiB
Shape,"(30, 51, 81)","(15, 51, 81)"
Count,3 Tasks,2 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 484.10 kiB 242.05 kiB Shape (30, 51, 81) (15, 51, 81) Count 3 Tasks 2 Chunks Type float32 numpy.ndarray",81  51  30,

Unnamed: 0,Array,Chunk
Bytes,484.10 kiB,242.05 kiB
Shape,"(30, 51, 81)","(15, 51, 81)"
Count,3 Tasks,2 Chunks
Type,float32,numpy.ndarray


### Run the Tracker

In [17]:
%%time
# Block: TRACKER (no task activity locally, but yes activity on halo)
Tracker = ocetrac.Tracker(hot_water, mask, radius=2, min_size_quartile=0.75, timedim = 'time', xdim = 'x', ydim='y', positive=True)
# blobs = Tracker.track()

TRACKER TRACK TRACKETY TRACK
CPU times: user 351 µs, sys: 654 µs, total: 1 ms
Wall time: 881 µs


In [16]:
blobs = Tracker.track()

TypeError: Only 2-D and 3-D images supported.