# Zarr ECCO ds - cleaned version

In [1]:
from dask.distributed import Client

client = Client("tcp://10.32.2.110:37943")
client

0,1
Client  Scheduler: tcp://10.32.2.110:37943  Dashboard: /user/0000-0002-8176-9465/proxy/8787/status,Cluster  Workers: 30  Cores: 60  Memory: 345.00 GB


In [2]:
import xarray as xr
from matplotlib import pyplot as plt
import gcsfs
import dask
import dask.array as dsa
import numpy as np
import intake
import fsspec
import xmitgcm
from xmitgcm.llcreader.llcmodel import faces_dataset_to_latlon
%matplotlib inline



In [3]:
ds = xr.open_zarr(gcsfs.GCSFileSystem(requester_pays=True).get_mapper('pangeo-ecco-eccov4r3/eccov4r3'), consolidated=True)
ds

In [4]:
dt = ds.time_snp[1:].load()
dt.values = [float(t)/10**9 for t in np.diff(ds.time_snp)]
#rename time (and iter axis)
dt = dt.rename({'time_snp':'time'})
# time axis of dt should be the same as of the monthly averages
dt.time.values = ds.time[1:-1].values
#dt.time.values = ds.time.values[1:-1] 
#add nans to dt to change size form 286 to 288

ds = ds.assign_coords(dt=dt)
ds.dt.attrs = {'units': 's','standard_name': 'dt','coordinate': 'time','long_name': 'time span between snapshots'}

#should I just add nans as first and last value?

In [5]:
ds_ll = faces_dataset_to_latlon(ds)
ds_ll

In [6]:
ds_ll_drop = ds_ll.drop(['i', 'j', 'i_g', 'j_g']) #, 'k', 'k_u', 'k_l', 'k_p1'])

In [7]:
# a trick to make things work a bit faster
coords = ds_ll_drop.coords.to_dataset().reset_coords().roll(i=100, i_g=100)
ds = ds_ll_drop.reset_coords(drop=True).roll(i=100, i_g=100)
ds

  
  This is separate from the ipykernel package so we can avoid doing imports until


In [8]:
import xgcm
grid = xgcm.Grid(ds, coords={'X': {'center': 'i', 'left': 'i_g'}, 
                             'Y': {'center': 'j', 'left': 'j_g'},
                             'T': {'center': 'time'},
                             'Z': {'center': 'k', 'left': 'k_l', 'outer': 'k_p1', 'right': 'k_u'}},
                             periodic=['X'])
grid

<xgcm.Grid>
X Axis (periodic):
  * center   i --> left
  * left     i_g --> center
Y Axis (not periodic):
  * center   j --> left
  * left     j_g --> center
T Axis (not periodic):
  * center   time
Z Axis (not periodic):
  * center   k --> left
  * left     k_l --> center
  * outer    k_p1 --> center
  * right    k_u --> center

In [9]:
def tracer_flux_budget(suffix):
    """Calculate the convergence of fluxes of tracer `suffix` where 
    `suffix` is `TH` or `SLT`. Return a new xarray.Dataset."""
    conv_horiz_adv_flux = -(grid.diff(ds['ADVx_' + suffix], 'X') +
                          grid.diff(ds['ADVy_' + suffix], 'Y', boundary='fill')).rename('conv_horiz_adv_flux_' + suffix)
    conv_horiz_diff_flux = -(grid.diff(ds['DFxE_' + suffix], 'X') +
                          grid.diff(ds['DFyE_' + suffix], 'Y', boundary='fill')).rename('conv_horiz_diff_flux_' + suffix)
    # sign convention is opposite for vertical fluxes
    conv_vert_adv_flux = grid.diff(ds['ADVr_' + suffix], 'Z', boundary='fill').rename('conv_vert_adv_flux_' + suffix)
    conv_vert_diff_flux = (grid.diff(ds['DFrE_' + suffix], 'Z', boundary='fill') +
                           grid.diff(ds['DFrI_' + suffix], 'Z', boundary='fill')).rename('conv_vert_diff_flux_' + suffix)
    
    all_fluxes = [conv_horiz_adv_flux, conv_horiz_diff_flux, conv_vert_adv_flux, conv_vert_diff_flux]
    #conv_all_fluxes = sum(all_fluxes).rename('conv_total_flux_' + suffix)
    #return xr.merge(all_fluxes + [conv_all_fluxes])
    return xr.merge(all_fluxes)

In [10]:
# sum of all converging adv/diff fluxes
budget_slt = tracer_flux_budget('SLT')
budget_slt

In [11]:
budget_th = tracer_flux_budget('TH')
budget_th

**Theta Tendency Term**

In [12]:
#calculating total tendency term for heat
theta_scaled = (ds.THETA_snp * (1 + (ds.ETAN_snp / coords.Depth))).diff('time_snp')

#forcing theta_scaled to have same timestamp as ds.dt by creating new data array called "theta_scaled"
theta_scaled = xr.DataArray(theta_scaled.data, dims=['time','k','j','i'], 
                            coords={'time': coords.dt[1:287].time.data})
theta_scaled

In [13]:
theta_tend = (theta_scaled / coords.dt[1:287])

**Salt Tendency Term**

In [14]:
#calculating total tendency term for salt
salt_scaled = (ds.SALT_snp * (1 + (ds.ETAN_snp / coords.Depth))).diff('time_snp')

#forcing salt_scaled to have same timestamp as ds.dt by creating new data array called "salt_scaled"
salt_scaled = xr.DataArray(salt_scaled.data, dims=['time','k','j','i'], 
                            coords={'time': coords.dt[1:287].time.data})
salt_scaled
#units: g/kg (psu)

In [15]:
salt_tend = (salt_scaled / coords.dt[1:287])
#units: g/kg*s (psu/s)

In [16]:
#adding calculated total tendency

volume = (coords.drF * coords.rA * coords.hFacC)
#day2seconds = (24*60*60)

budget_th['total_tendency_TH_truth'] = (theta_tend * volume)
budget_slt['total_tendency_SLT_truth'] = (salt_tend * volume)
budget_th['total_tendency_TH_truth'] = budget_th['total_tendency_TH_truth']#.chunk({'i':-1, 'j':-1, 'k':-1, 'time':1})
budget_slt['total_tendency_SLT_truth'] = budget_slt['total_tendency_SLT_truth']#.chunk({'i':-1, 'j':-1, 'k':-1, 'time':1})

In [17]:
runit2mass = 1.035e3 #kg/m^3
salt_plume = ((ds.oceSPtnd * coords.rA)/runit2mass)#.chunk({'i':-1, 'j':-1, 'k':-1, 'time':1})
salt_plume

### Surface Fluxes

The surface fluxes are only active in the top model layer. We need to specify some constants to convert to the proper units and scale factors to convert to integral form. They also require some xarray special sauce to merge with the 3D variables.

In [18]:
# constants
heat_capacity_cp = 3.994e3
runit2mass = 1.035e3 #rho

# treat the shortwave flux separately from the rest of the surface flux

#surf flux
surf_flux_th = (ds.TFLUX - ds.oceQsw) * coords.rA * coords.hFacC.isel(k=0) / (heat_capacity_cp * runit2mass)
#w.o `_,` the return would've been the two arrays - hFacC and surf_flux_th - so here, we only want surf_flux_th returned
_, surf_flux_th_aligned = xr.align(coords.hFacC, surf_flux_th.expand_dims('k'), join='outer')
surf_flux_th_aligned = surf_flux_th_aligned.fillna(0.)

#sw flux
surf_flux_th_sw = ds.oceQsw * coords.rA * coords.hFacC.isel(k=0) / (heat_capacity_cp * runit2mass)

#what about `ds.oceFWflx` term for salt budget?

# salt
surf_flux_slt = ds.SFLUX * coords.rA  / runit2mass
surf_flux_slt.coords['k'] = coords.k[0]
_, surf_flux_slt_aligned = xr.align(coords.hFacC, surf_flux_slt.expand_dims('k'), join='outer')
surf_flux_slt_aligned = surf_flux_slt_aligned.fillna(0.)
#units: (˚C m^3)/s

# in order to align the surface fluxes with the rest of the 3D budget terms,
# we need to give them a z coordinate. We can do that with this function
#def surface_to_3d(da):
#    da.coords['k'] = ds.k[0]
#    return da.expand_dims(dim='k', axis=1)

In [19]:
budget_slt['salt_plume'] = salt_plume
budget_slt['surface_flux_SLT'] = surf_flux_slt_aligned

### Shortwave Flux

Special treatment is needed for the shortwave flux, which penetrates into the interior of the water column

In [20]:
def swfrac(coords, fact=1., jwtype=2):
    """Clone of MITgcm routine for computing sw flux penetration.
    z: depth of output levels"""
    
    rfac = [0.58 , 0.62, 0.67, 0.77, 0.78]
    a1 = [0.35 , 0.6  , 1.0  , 1.5  , 1.4]
    a2 = [23.0 , 20.0 , 17.0 , 14.0 , 7.9 ]
    
    facz = fact * coords.Zl.sel(k_l=slice(0, 18))
    j = jwtype-1
    swdk = (rfac[j] * np.exp(facz / a1[j]) +
            (1-rfac[j]) * np.exp(facz / a2[j]))
            
    return swdk.rename('swdk')

_, swdown = xr.align(coords.Zl, surf_flux_th_sw * swfrac(coords), join='left', )
swdown = swdown.fillna(0)

In [21]:
#convergence of swdown
sw = -grid.diff(swdown, 'Z', boundary='fill')
sw

In [22]:
budget_th['surface_flux_conv_TH'] = surf_flux_th_aligned#.chunk({'i':-1, 'j':-1, 'k':-1, 'time':1})

budget_th['sw_flux_conv_TH'] = sw#.chunk({'i':-1, 'j':-1, 'k':-1, 'time':1})

In [23]:
budget_th['theta'] = ds.THETA

budget_th = budget_th.chunk({'i': -1, 'j': -1, 'k': -1, 'time': 1})
budget_th

In [24]:
budget_slt['salt'] = ds.SALT
budget_slt['salt_snp'] = ds.SALT_snp

budget_slt = budget_slt.chunk({'i': -1, 'j': -1, 'k': -1, 'time': 1})
budget_slt

## Saving Data

In [25]:
gcfs_auth = gcsfs.GCSFileSystem(project='pangeo-181919', token='browser')
token = gcfs_auth.tokens[('pangeo-181919', 'full_control')]
gcfs_w_token = gcsfs.GCSFileSystem(project='pangeo-181919', token=token)

Please visit this URL to authorize this application: https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=586241054156-9kst7ltfj66svc342pcn43vp6ta3idin.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdevstorage.full_control&state=T90LOneWK2YAogOgC2yTY9P36aberH&prompt=consent&access_type=offline


Enter the authorization code:  4/wAHN5rDYLkSD-le-nMQZYt7ZkTv8uvAAuMGON3soHEIc3irp0DbOOYM


In [None]:
#saving heat budget ds
gcsmap_th = gcfs_w_token.get_mapper('pangeo-tmp/stb2145/ECCO/budget_th.zarr')
budget_th.to_zarr(gcsmap_th, mode='w')

In [None]:
#saving salt budget ds
gcsmap_slt = gcfs_w_token.get_mapper('pangeo-tmp/stb2145/ECCO/budget_slt.zarr')
budget_slt.to_zarr(gcsmap_slt, mode='w')