This notebook runs OceTrac on a subset of MUR data.

In [6]:
import shutil
import os

import fsspec
import xarray as xr
import numpy as np
# import matplotlib.pyplot as plt
# import dask
from dask.distributed import Client
# from zarr.errors import ContainsGroupError
# from xmhw import xmhw
import marineHeatWaves as mhw
import dask

# Set up Cluster

In [4]:
TEMP_DIR = os.environ.get('TEMP_DIR')
dask.config.set(temporary_directory='/data/pacific/rwegener/')

<dask.config.set at 0x7f4f185a0970>

In [7]:
client = Client(memory_limit='216GB')
print(client.dashboard_link)

Perhaps you already have a cluster running?
Hosting the HTTP server on port 34736 instead


http://127.0.0.1:34736/status


# Load Data

## MUR

In [8]:
# Block: LOAD ZARR (no task activity)
file_location = 's3://mur-sst/zarr'

ikey = fsspec.get_mapper(file_location, anon=True)

mur_full = xr.open_zarr(ikey, consolidated=True)
mur = mur_full['analysed_sst']

In [9]:
mur

Unnamed: 0,Array,Chunk
Bytes,15.19 TiB,245.78 MiB
Shape,"(6443, 17999, 36000)","(6443, 100, 100)"
Count,64801 Tasks,64800 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 15.19 TiB 245.78 MiB Shape (6443, 17999, 36000) (6443, 100, 100) Count 64801 Tasks 64800 Chunks Type float32 numpy.ndarray",36000  17999  6443,

Unnamed: 0,Array,Chunk
Bytes,15.19 TiB,245.78 MiB
Shape,"(6443, 17999, 36000)","(6443, 100, 100)"
Count,64801 Tasks,64800 Chunks
Type,float32,numpy.ndarray


# Subset

In [10]:
# Block: SUBSET
# 4 chunk subset, ~110 MB total
mur_subset = mur.sel(lat=slice(32, 32.5), lon=slice(121.4, 122.2))

In [11]:
mur_subset

Unnamed: 0,Array,Chunk
Bytes,101.53 MiB,74.96 MiB
Shape,"(6443, 51, 81)","(6443, 50, 61)"
Count,64805 Tasks,4 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 101.53 MiB 74.96 MiB Shape (6443, 51, 81) (6443, 50, 61) Count 64805 Tasks 4 Chunks Type float32 numpy.ndarray",81  51  6443,

Unnamed: 0,Array,Chunk
Bytes,101.53 MiB,74.96 MiB
Shape,"(6443, 51, 81)","(6443, 50, 61)"
Count,64805 Tasks,4 Chunks
Type,float32,numpy.ndarray


### Exploration

In [None]:
mur_subset.max().compute()  # 304.75
mur_subset.min().compute()  # 265.382

In [None]:
%%time
mur_subset.isel(time=0).plot()

# Preprocess (xmhw)

In [12]:
from xmhw.xmhw import threshold, detect

In [None]:
# Runs for at least 2 minutes but doesn't complete
# I may have gotten this to work in the past? (At a time with a small chunk and more patience)
clim = threshold(mur_subset)

# Preprocess (`xmhw` manual)

In [22]:
# xmhw/calc_clim()

rolled = mur_subset.rolling(time=5, center=True)  # .construct('wdim')
rolled

DataArrayRolling [time->5(center)]

In [24]:
rolled

DataArrayRolling [time->5(center)]

# Preprocess (EJO `marineHeatwaves`)

In [5]:
from datetime import datetime 

In [6]:
t = mur_subset.time

In [7]:
# Format time values
mur_t_dt = [datetime.strptime(str(time), '%Y-%m-%dT%H:%M:%S.000000000') for time in t.values]

mur_t_dt_ordinal = np.array([time.toordinal() for time in mur_t_dt])

In [8]:
# Extract sst as a numpy array
sst_np = mur_subset.values

In [11]:
%%time
for x in range(10):
    for y in range(10):
        mhws, clim = mhw.detect(mur_t_dt_ordinal, sst_np[:, x, y])

CPU times: user 11.5 s, sys: 0 ns, total: 11.5 s
Wall time: 11.5 s


In [12]:
clim

{'thresh': array([265.38198853, 265.38198853, 265.38198853, ..., 265.38198853,
        265.38198853, 265.38198853]),
 'seas': array([265.38203184, 265.38203283, 265.38203381, ..., 265.38203381,
        265.38203283, 265.38203184]),
 'missing': array([False, False, False, ..., False, False, False])}