# Relabel and merge yearly ocetrac output

In [None]:
import dask
import xarray as xr
import numpy as np
import pandas as pd
import dask.array as da
import warnings
warnings.filterwarnings('ignore')
from datetime import date
import matplotlib.pyplot as plt
print('loaded libraries')

In [None]:
hfdrake_path = "/pub/hfdrake/datasets/CM4_MHW_blobs/"
mt_path = "/pub/mariant3/WarmWaterMasses/data/"

ds = xr.open_mfdataset(f"{hfdrake_path}/data_daily/*.ocean_daily.*.nc", chunks={"time":1})
ds = ds.isel(yh=slice(1, None), yq=slice(None, -1), xh=slice(1,None), xq=slice(None, -1)) # realign cell center/corner coordinates

xh_min = ds.xq.values.min()
print(xh_min)
xh_max = ds.xq.values.max()
print(xh_max)
yh_min = ds.yq.values.min()
print(yh_min)
yh_max = ds.yq.values.max()
print(yh_max)

snap = xr.open_mfdataset(f"{hfdrake_path}/data_daily/*.ocean_daily_snap*.nc", chunks={"time":1})
# Rename snapshot time coordinates to time_bounds so they can later be merged with ds
snap = snap.rename({
    **{'time': 'time_bounds'},
    **{v: f"{v}_bounds" for v in snap.data_vars}
    })

static = xr.open_dataset("/pub/hfdrake/datasets/CM4_MHW_blobs/data/WMT_monthly/ocean_month_rho2.static.nc")

tos = xr.open_dataset(f"{hfdrake_path}/data/ocean_daily_cmip.01860101-01901231.tos.nc", chunks={'time':1})
tos_static = xr.open_dataset(f"{hfdrake_path}/data/ocean_daily_cmip.static.nc")
blobs = xr.open_mfdataset(f"{mt_path}/ocetracv6/ocetrac-v6-blobs-tos-t1*.nc")

Relabel events

In [None]:
# Outputs relabeled object for the globe. 
years = np.unique(blobs.time.dt.year.values)

for i, year in enumerate(years[1:]):
    print(f"Working on year {year}")
    year_before = blobs.labels.isel(time=(blobs.time.dt.year==year-1)).max().values
    blobs["labels"] = xr.where(blobs.time.dt.year==year, blobs.labels + year_before, blobs.labels)

In [None]:
# Storing the individual id values in the MANSO region
ids = np.unique(blobs.labels.sel(
    xh=slice(xh_min, xh_max), 
    yh=slice(yh_min, yh_max)
))

ids = np.array([id for id in ids if ~np.isnan(id)])

In [None]:
startday = blobs.time.values.astype(dtype='str')[0]
endday = blobs.time.values.astype(dtype='str')[-1]

In [None]:
# merging tos and the regional blobs
reg_blobs = blobs.sel(
    xh=slice(xh_min, xh_max), 
    yh=slice(yh_min, yh_max)
)

ds_region = (tos
             .sel(
                 time=slice(startday,endday))
             .sel(
                xh=slice(xh_min, xh_max), 
                yh=slice(yh_min, yh_max)
             ))

ds_region_blobs = xr.merge(
    [ds_region,reg_blobs],join='inner'
)

In [None]:
# Defining the land and ocean mask
hot_water = ds_region_blobs['tos']
#lsmask = xr.ones_like(binary_out.isel(time=0))
mask_ocean = 1 * np.ones(ds_region_blobs['tos'].shape[1:]) * np.isfinite(ds_region_blobs['tos'].isel(time=0))
mask_land = 0 * np.ones(ds_region_blobs['tos'].shape[1:]) * np.isnan(ds_region_blobs['tos'].isel(time=0))
lomask = mask_ocean + mask_land

In [None]:
# xarray Dataset to save
ds_out = xr.Dataset(
    data_vars=dict(
        blobs=(['time', 'yh', 'xh'], reg_blobs.labels.values, reg_blobs.labels.attrs),
        lomask=(['yh', 'xh'], lomask.values),

    ),
    coords=dict(
        xh=ds_region_blobs['tos'].xh,
        yh=ds_region_blobs['tos'].yh,
        time=ds_region_blobs['tos'].time,
    ),
    attrs=dict(description="Relabeled ocetrac-v6-run for the globe",
               data="tos from CM4.0", #full-period climatology
               task="radius=0.0, min_size_quartile=0.0",
               threshold='29 deg C'
)
)
ds_out

In [None]:
## for nomenclature
d = blobs.isel(time = 0).time.dt
e = blobs.isel(time = -1).time.dt
date_d = f"{d.year.values:0004}{d.month.values:02}{d.day.values:02}"
date_e = f"{e.year.values:0004}{e.month.values:02}{e.day.values:02}"

In [None]:
# ds_out.to_netcdf(f"/pub/mariant3/WarmWaterMasses/data/ocetracv9/ocetrac-v9-blobs-tos-t1-r1-msq0-{date_d}-{date_e}-region.nc")