In [None]:
import xarray as xr
import numpy as np
import pandas as pd

# heat fluxes ds

## gridding funcs

In [3]:
# 3d gridding funcs
def grid_lat_3d(dsgpd_ln,gs=1):
    lat_min = -80
    lat_max = -30
    lat = np.arange(lat_min,lat_max+gs,gs)
    lat_labels = range(0,lat_max-lat_min,gs)
    
    return dsgpd_ln.groupby_bins('lat',lat,
                       labels=lat_labels,
                       restore_coord_dims=True).median(skipna=True,dim='time')
    
def grid_lon_3d(dsgpd_t,gs=1):
    # define lon min and max resp
    lon_min = -180
    lon_max = 180
    lon = np.arange(lon_min,lon_max+gs,gs)
    lon_labels = range(0,lon_max-lon_min,gs)

    return dsgpd_t.groupby_bins('lon',lon,
                       labels=lon_labels,
                       restore_coord_dims=True).apply(grid_lat_3d)

def grid_month_3d(dsgpd_yr,gs=1):
    """grid dataset onto a monthly climatology"""
    return dsgpd_yr.groupby('time.month').apply(grid_lon_3d)

def grid_var_3d(dsvar,gs=1):
    """grid 1x1° onto year-month index time series"""
    return dsvar.groupby('time.year').apply(grid_month_3d)

# 4d gridding funcs

def grid_lat_4d(dsgpd_ln,gs=1):
    lat_min = -80
    lat_max = -30
    lat = np.arange(lat_min,lat_max+gs,gs)
    lat_labels = range(0,lat_max-lat_min,gs)
    
    return dsgpd_ln.groupby_bins('lat',lat,
                       labels=lat_labels,
                       restore_coord_dims=True).median(skipna=True,dim='n_prof')
    
def grid_lon_4d(dsgpd_t,gs=1):
    # define lon min and max resp
    lon_min = -180
    lon_max = 180
    lon = np.arange(lon_min,lon_max+gs,gs)
    lon_labels = range(0,lon_max-lon_min,gs)

    return dsgpd_t.groupby_bins('lon',lon,
                       labels=lon_labels,
                       restore_coord_dims=True).apply(grid_lat_4d)

def grid_month_4d(dsgpd_yr,gs=1):
    return dsgpd_yr.groupby('time.month').apply(grid_lon_4d)

def grid_var_4d(dsvar,gs=1):
    return dsvar.groupby('time.year').apply(grid_month_4d)

## grid datasets

In [68]:
hf = xr.open_mfdataset('/home/theospira/notebooks/data/ERA5_heat_flux/6_hourly/**.nc').rename({'longitude':'lon','latitude':'lat'})
hf = hf.sel(lat=slice(-30,-79))
hf

Unnamed: 0,Array,Chunk
Bytes,27.83 GiB,793.24 MiB
Shape,"(26300, 197, 1442)","(1464, 197, 721)"
Count,180 Tasks,36 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 27.83 GiB 793.24 MiB Shape (26300, 197, 1442) (1464, 197, 721) Count 180 Tasks 36 Chunks Type float32 numpy.ndarray",1442  197  26300,

Unnamed: 0,Array,Chunk
Bytes,27.83 GiB,793.24 MiB
Shape,"(26300, 197, 1442)","(1464, 197, 721)"
Count,180 Tasks,36 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,27.83 GiB,793.24 MiB
Shape,"(26300, 197, 1442)","(1464, 197, 721)"
Count,180 Tasks,36 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 27.83 GiB 793.24 MiB Shape (26300, 197, 1442) (1464, 197, 721) Count 180 Tasks 36 Chunks Type float32 numpy.ndarray",1442  197  26300,

Unnamed: 0,Array,Chunk
Bytes,27.83 GiB,793.24 MiB
Shape,"(26300, 197, 1442)","(1464, 197, 721)"
Count,180 Tasks,36 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,27.83 GiB,793.24 MiB
Shape,"(26300, 197, 1442)","(1464, 197, 721)"
Count,180 Tasks,36 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 27.83 GiB 793.24 MiB Shape (26300, 197, 1442) (1464, 197, 721) Count 180 Tasks 36 Chunks Type float32 numpy.ndarray",1442  197  26300,

Unnamed: 0,Array,Chunk
Bytes,27.83 GiB,793.24 MiB
Shape,"(26300, 197, 1442)","(1464, 197, 721)"
Count,180 Tasks,36 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,27.83 GiB,793.24 MiB
Shape,"(26300, 197, 1442)","(1464, 197, 721)"
Count,180 Tasks,36 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 27.83 GiB 793.24 MiB Shape (26300, 197, 1442) (1464, 197, 721) Count 180 Tasks 36 Chunks Type float32 numpy.ndarray",1442  197  26300,

Unnamed: 0,Array,Chunk
Bytes,27.83 GiB,793.24 MiB
Shape,"(26300, 197, 1442)","(1464, 197, 721)"
Count,180 Tasks,36 Chunks
Type,float32,numpy.ndarray


In [79]:
def coarsen_ds(ds,gs=1,lat2b=50,lon2b=360):
    """
    coarsen a gridded dataset to a 1x1° lat-lon gridspace.
    ------
    params:
    ds: xarray.Dataset, 
        desired dataset to coarsen
    gs: grid size
        default 1
    lat2b: size of latitude grid to be [-79,-30]
           default 30
    lon2b: size of longitude grid to be [-180,180]
           default 360
    
    """
    ds = (ds.coarsen(lat=int(ds.lat.size*gs/lat2b),boundary='trim').mean(
            ).coarsen(lon=int(ds.lon.size*gs/lon2b),boundary='trim').mean())
    return ds

In [70]:
hf.lat.size/33

5.96969696969697

In [81]:
#hf = 
coarsen_ds(hf)

Unnamed: 0,Array,Chunk
Bytes,2.29 GiB,65.34 MiB
Shape,"(26300, 65, 360)","(1464, 65, 180)"
Count,540 Tasks,36 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.29 GiB 65.34 MiB Shape (26300, 65, 360) (1464, 65, 180) Count 540 Tasks 36 Chunks Type float32 numpy.ndarray",360  65  26300,

Unnamed: 0,Array,Chunk
Bytes,2.29 GiB,65.34 MiB
Shape,"(26300, 65, 360)","(1464, 65, 180)"
Count,540 Tasks,36 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.29 GiB,65.34 MiB
Shape,"(26300, 65, 360)","(1464, 65, 180)"
Count,540 Tasks,36 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.29 GiB 65.34 MiB Shape (26300, 65, 360) (1464, 65, 180) Count 540 Tasks 36 Chunks Type float32 numpy.ndarray",360  65  26300,

Unnamed: 0,Array,Chunk
Bytes,2.29 GiB,65.34 MiB
Shape,"(26300, 65, 360)","(1464, 65, 180)"
Count,540 Tasks,36 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.29 GiB,65.34 MiB
Shape,"(26300, 65, 360)","(1464, 65, 180)"
Count,540 Tasks,36 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.29 GiB 65.34 MiB Shape (26300, 65, 360) (1464, 65, 180) Count 540 Tasks 36 Chunks Type float32 numpy.ndarray",360  65  26300,

Unnamed: 0,Array,Chunk
Bytes,2.29 GiB,65.34 MiB
Shape,"(26300, 65, 360)","(1464, 65, 180)"
Count,540 Tasks,36 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.29 GiB,65.34 MiB
Shape,"(26300, 65, 360)","(1464, 65, 180)"
Count,540 Tasks,36 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 2.29 GiB 65.34 MiB Shape (26300, 65, 360) (1464, 65, 180) Count 540 Tasks 36 Chunks Type float32 numpy.ndarray",360  65  26300,

Unnamed: 0,Array,Chunk
Bytes,2.29 GiB,65.34 MiB
Shape,"(26300, 65, 360)","(1464, 65, 180)"
Count,540 Tasks,36 Chunks
Type,float32,numpy.ndarray


In [100]:
idx = np.unique(hf.isel(lon=720).slhf == hf.isel(lon=721).slhf, return_index=True)

In [107]:
idx

(array([False,  True]), array([   0, 1623]))

In [106]:
hf.isel(lon=721).slhf

Unnamed: 0,Array,Chunk
Bytes,19.76 MiB,1.10 MiB
Shape,"(26300, 197)","(1464, 197)"
Count,198 Tasks,18 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 19.76 MiB 1.10 MiB Shape (26300, 197) (1464, 197) Count 198 Tasks 18 Chunks Type float32 numpy.ndarray",197  26300,

Unnamed: 0,Array,Chunk
Bytes,19.76 MiB,1.10 MiB
Shape,"(26300, 197)","(1464, 197)"
Count,198 Tasks,18 Chunks
Type,float32,numpy.ndarray


In [104]:
hf.isel(lon=720).slhf[0].load()

In [105]:
hf.isel(lon=721).slhf[0].load()

In [96]:
hf.isel(lon=slice(720,722))

Unnamed: 0,Array,Chunk
Bytes,39.53 MiB,1.10 MiB
Shape,"(26300, 197, 2)","(1464, 197, 1)"
Count,216 Tasks,36 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 39.53 MiB 1.10 MiB Shape (26300, 197, 2) (1464, 197, 1) Count 216 Tasks 36 Chunks Type float32 numpy.ndarray",2  197  26300,

Unnamed: 0,Array,Chunk
Bytes,39.53 MiB,1.10 MiB
Shape,"(26300, 197, 2)","(1464, 197, 1)"
Count,216 Tasks,36 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,39.53 MiB,1.10 MiB
Shape,"(26300, 197, 2)","(1464, 197, 1)"
Count,216 Tasks,36 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 39.53 MiB 1.10 MiB Shape (26300, 197, 2) (1464, 197, 1) Count 216 Tasks 36 Chunks Type float32 numpy.ndarray",2  197  26300,

Unnamed: 0,Array,Chunk
Bytes,39.53 MiB,1.10 MiB
Shape,"(26300, 197, 2)","(1464, 197, 1)"
Count,216 Tasks,36 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,39.53 MiB,1.10 MiB
Shape,"(26300, 197, 2)","(1464, 197, 1)"
Count,216 Tasks,36 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 39.53 MiB 1.10 MiB Shape (26300, 197, 2) (1464, 197, 1) Count 216 Tasks 36 Chunks Type float32 numpy.ndarray",2  197  26300,

Unnamed: 0,Array,Chunk
Bytes,39.53 MiB,1.10 MiB
Shape,"(26300, 197, 2)","(1464, 197, 1)"
Count,216 Tasks,36 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,39.53 MiB,1.10 MiB
Shape,"(26300, 197, 2)","(1464, 197, 1)"
Count,216 Tasks,36 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 39.53 MiB 1.10 MiB Shape (26300, 197, 2) (1464, 197, 1) Count 216 Tasks 36 Chunks Type float32 numpy.ndarray",2  197  26300,

Unnamed: 0,Array,Chunk
Bytes,39.53 MiB,1.10 MiB
Shape,"(26300, 197, 2)","(1464, 197, 1)"
Count,216 Tasks,36 Chunks
Type,float32,numpy.ndarray


In [88]:
np.where(hf.lon.diff('lon')==0)

(array([720]),)

In [83]:
np.unique(hf.lon.diff('lon'))

array([0.  , 0.25], dtype=float32)

In [80]:
np.unique(coarsen_ds(hf).lon.diff('lon'))

array([0.8125, 0.9375, 1.    ], dtype=float32)

In [6]:
x    = 1 
time = np.asarray(pd.date_range("2004-01-01","2021-12-31",freq="1M"))
p    = xr.open_dataset('/home/theospira/notebooks/data/SO_2004-2021_2dbar_interp_w_mld.nc').pres.data
lat  = np.arange(-79,-30+x,x)
lon  = np.arange(-180,180,x)

ds_grid = xr.Dataset(
            data_vars = dict(
                            slhf = (['time','lon','lat'], dvars_4d[0]),
                            sshf = (['time','lon','lat'], dvars_4d[1]),
                            ssr  = (['time','lon','lat'], dvars_4d[2]),
                            str  = (['time','lon','lat'], dvars_4d[3]),
                            ),
            coords   = dict(
                            time    = (['time'], time),
                            lon     = (['lon'], lon),
                            lat     = (['lat'], lat),
                            ),
            attrs    = dict(description=str(
    '1 degree grid of median along 2dbar pressure interpolated monthly time series data from Argo, MEOP, SOCCOM, CTDs, and Gliders.'))
            )

# add mask
ds_grid['mask'] = xr.open_dataset('/home/theospira/notebooks/2m_interp/interp/3_part_mask.nc').mask
ds_grid["mask"].attrs["legend"] = "0 = Subantarctic Zone (SAZ). \n 1 = ACC Zone (ACCZ). \n 2 = Sub-ACC Zone (SACCZ)."

In [11]:
hfy = hf.groupby('time.year')

hfm = []
for i in tqdm(list(hfy.groups.keys())):
    hfm += hfy[i].groupby('time.month')[i],

In [None]:
# load ssh
ssh = xr.open_dataset('/home/theospira/notebooks/data/Copernicus/ssh_monthly_climatology_2004-2021.nc')
# load sea ice
si = xr.open_dataset('/home/theospira/notebooks/data/meereisportal_sea_ice/sic_monthly_time_series.nc.nc')
