## Restructure data again to correctly bin transports

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
#Packages 
import numpy as np
import xarray as xr
import cmocean.cm as cmo
import matplotlib.pyplot as plt
import mpas_tools

from geometric_features import GeometricFeatures
from geometric_features import read_feature_collection

from mpas_tools.mesh.mask import compute_mpas_region_masks
from mpas_tools.parallel import create_pool
from mpas_tools.io import write_netcdf

import geojson
import json
import mosaic
import os.path
import mpasregions.sections as mps
import gsw
import warnings 
import glob
from xgcm import Grid

In [3]:
# global mesh
mesh_path = '/global/cfs/projectdirs/e3sm/inputdata/ocn/mpas-o/EC30to60E2r2/ocean.EC30to60E2r2.210210.nc'
mesh = xr.open_dataset(mesh_path)
mesh = mesh.assign_coords({
"VertexID": xr.DataArray(mesh.indexToVertexID, dims=('nVertices',)),
})

# global data
bmm_filepath = '/pscratch/sd/b/bmoorema/run_001_062/'
dso = xr.open_dataset(bmm_filepath + '20210421_sim7_CORE_60to30E2r2.mpaso.hist.am.timeSeriesStatsMonthly.0063-12-01.nc')

# open base mask that we will fill with 0s according to the original mask (this new mask will contain cells PLUS their corresponding edges and vertices)
# base_mask = xr.open_dataset('./mpas-o_EC30to60_global_base_mask.nc')


  dso = xr.open_dataset(bmm_filepath + '20210421_sim7_CORE_60to30E2r2.mpaso.hist.am.timeSeriesStatsMonthly.0063-12-01.nc')


In [4]:
dso.xtime_startMonthly

months = ['01','02','03','04','05','06','07','08','09','10','11','12']
ds_list = []

prefix = '20210421_sim7_CORE_60to30E2r2.mpaso.hist.am.timeSeriesStats'

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    for i in range(0,12):
        month = months[i]
        file = prefix + f'Monthly.0063-{month}-01.nc'
        file_glob = glob.glob(file)
        ds = xr.open_dataset(bmm_filepath + file)[['xtime_startMonthly','timeMonthly_avg_normalVelocity','timeMonthly_avg_potentialDensity','timeMonthly_avg_layerThickness']]
        ds_xtime_startMonthly = ds.swap_dims({'Time':'xtime_startMonthly'})
        ds_list.append(ds_xtime_startMonthly)

dss = xr.concat(ds_list,dim='xtime_startMonthly')

```python
def extract_variables(ds):
    return ds[var_list]
xr.open_mfdataset(file = prefix + f'Monthly.0063-*-01.nc', preprocess=extract_variables)
```

In [5]:
# set up descriptor for plotting 
# # set up mesh to apply polypcolor
mesh.attrs['is_periodic'] = 'YES'
mesh.attrs['x_period'] = 360
mesh.attrs['y_period'] = 0.0

descriptor = mosaic.Descriptor(mesh,use_latlon=False)

### get edge and vertex indices for a transect created from an algorithm

In [6]:
LS_lats= np.array([54, 60, 66, 64, 58])   
LS_lons= np.array([302, 315, 310, 295, 296])

LS_lats = np.append(LS_lats, LS_lats[0])
LS_lons = np.append(LS_lons, LS_lons[0])

# # calculate transects from algorithm, sort vertices & edges to be in consecutive order
test_edges, test_verts = mps.calculate_transects_multiple_pts(LS_lons, LS_lats, mesh)

# from the transect, create a mask to capture the entire region specified by the transects
# this will also output lats and lons corresponding to test_verts
path = './'
filename = 'LS_test'
geojson_file_name = 'Labrador Sea from transect algorithm'
tags = "Labrador_Sea;Davis_Strait"
author = "Kaila Uyeda"

    
test_verts_lats, test_verts_lons, dsMasks = mps.transect_from_alg_create_nc(test_verts, 
                                                                            mesh, 
                                                                            path,
                                                                            filename, 
                                                                            geojson_file_name,
                                                                            tags, 
                                                                            author)

# use the dsMasks file to get the ACTUAL working vertices that you will need
# this eliminates duplicate vertices that would occur if the transect moves back on itself
# check that the vertices and edges from the mask are actually at the same plaaces...
# in other words, we have the correct cells for the mask but extra edges and vertices because of how the transect is created.
# we have to do this after we create a mask with the cells. Otherwise, we don't know what side of the boundary
# is considered "inside the mask"

alg_edges, alg_vertices = mps.find_and_sort_transect_edges_and_vertices(mesh,dsMasks)

LS_test_transect_from_alg.geojson exists!
Opening LS_test_transect_from_alg.nc as dsMasks


In [7]:
nEdges_along_boundary = xr.DataArray(np.arange(0, alg_edges.size), dims=("nEdges_along_boundary", ))
ds_boundary = xr.Dataset(coords={
    "nEdges_along_boundary": nEdges_along_boundary,
    "nEdges_ID": xr.DataArray(alg_edges, dims=("nEdges_along_boundary",))
})

In [8]:
ds_boundary

In [9]:
nEdges_sorted = xr.DataArray(np.arange(0, alg_edges.size), dims=("nEdges_sorted", ))
sorted_boundary_ds = xr.Dataset(coords={
    "nEdges_sorted": nEdges_sorted,
    "xr_Edges_id": xr.DataArray(alg_edges, dims=("nEdges_sorted",))
})

In [10]:
sorted_boundary_ds

In [11]:
dss_cleaned = dss.isel(nEdges = sorted_boundary_ds.xr_Edges_id)

### get edge and vertex indices for a transect created from a mask

In [12]:
# open mask of desired region (this is to find transects from a pre-existing mask)

path = './'
filename = 'LS_test_transect_from_mask'

check_nc_existence = os.path.isfile(path + filename + '.nc')

# check if .nc mask file exists
if check_nc_existence == True:
    print(f'Opening {filename}.nc file as mask')
    mask = xr.open_dataset(path + filename + '.nc')
else: 
    print('Creating .nc file')
    check_geojson_existence = os.path.isfile(path + filename + '.geojson')

    # convert LS_test.geojson to LS_test.nc mask file
    if check_geojson_existence == True:
        print(f'Using {filename}.geojson to create .nc file')
        fcMask = read_feature_collection(path + filename + '.geojson')
        # pool = create_pool(process_count=8)
        dsMasks = compute_mpas_region_masks(mesh, fcMask, maskTypes =('cell',), pool=pool)
        dsMasks.to_netcdf(path + filename + '.nc', format='NETCDF4', mode='w')
        mask = xr.open_dataset(path + filename + '.nc')
        print(f'{filename}.nc created and opened as masks')
    else:
        print(f'{filename}.geojson does NOT exist!')

mask_edges, mask_vertices = mps.find_and_sort_transect_edges_and_vertices(mesh,mask)

Opening LS_test_transect_from_mask.nc file as mask


### Set up choice of edges and vertices based on method of creation.

In [13]:
method = 'alg' # 'mask'
if method == 'alg':
    edges, vertices = alg_edges, alg_vertices
    mask = dsMasks

if method == 'mask':
    edges, vertices = mask_edges, mask_vertices
    mask = mask

### A note on sign convention from normal velocity taken from [this python script on Github](https://github.com/milenaveneziani/analysis/blob/c7eb78deabb6fe9529d43fecdae6ce700eefff38/common_functions.py#L119) where 
`cellsOnEdge[nEdge,0]` is equivalent to my use of `A` cell, and \
`cellsonEdge[nEdge,1]` is equivalent to my use of `B` cell.

```ruby
# Compute edgeSigns so that transport INTO the region is positive. This is 
    # calculated considering that normalVelocity is positive when pointing from
    # cellsOnEdges[nEdge, 0] to cellsOnEdges[nEdge, 1]. Therefore, if 
    # cellsOnEdges[nEdge, 0] is outside the region and cellsOnEdges[nEdge, 1]
    # is inside the region, the sign remains unchanged (np.sign(regionMask[cellsOnEdges[nEdge, 1]] - 0.5)=1).
    # But, if cellsOnEdges[nEdge, 0] is inside the region and cellsOnEdges[nEdge, 1]
    # is outside the region, then the sign is flipped (np.sign(regionMask[cellsOnEdges[nEdge, 1]] - 0.5)
    # becomes -1. With this reasoning, the following also works (and we have verified that):
    # openBoundarySigns = -np.sign(regionMask[cellsOnEdges[~landEdges, 0]] - 0.5)
```

In [14]:
def calculate_velo_into_mask(ds, mesh, mask, outside_mask_values):
    ds['veloIntoMask'] = ds.timeMonthly_avg_normalVelocity * 0 
    
    # .isel the mesh to only get the sorted edges on the transect
    mesh_transect = mesh.isel(nEdges = dss_cleaned.nEdges_sorted.xr_Edges_id)
    
    # find transect edges on land
    xr_transect_edgesOnLand_TWO0 = mesh_transect.xr_Edges_id.where(np.isin(mesh_transect.cellsOnEdge.isel(TWO=0),0))
    xr_transect_edgesOnLand_TWO1 = mesh_transect.xr_Edges_id.where(np.isin(mesh_transect.cellsOnEdge.isel(TWO=1),0))
    xr_transect_edgesOnLand = np.union1d(xr_transect_edgesOnLand_TWO0, xr_transect_edgesOnLand_TWO1)
    
    # find transect edges on open ocean
    xr_transect_edgesOnOcean = np.setxor1d(edges, xr_transect_edgesOnLand)
    
    # select all the cells inside the mask
    xr_cells_inside, ignore_xr_inside_edges, ignore_xr_inside_vertices = mps.xr_inside_mask_info(mesh,mask)
    
    # of the open ocean edges, determine if the normal velocity is into or out of the mask
    
    # find the transect cells on open ocean edges
    n_transect_cellsOnOceanEdges = mesh_transect.cellsOnEdge
    xr_transect_cellsOnOceanEdges = mps.n_to_xr_idx(n_transect_cellsOnOceanEdges)
    
    # determine if the normal velocity points into or out of the mask
    for i in range(0,len(xr_transect_cellsOnOceanEdges)):
        for j in range(0,len(ds.xtime_startMonthly)):
            cellsOnSelectedEdge = xr_transect_cellsOnOceanEdges.isel(nEdges_sorted = i)
            selectedEdge = mesh_transect.nEdges_sorted.isel(nEdges_sorted = i)
            selectedMonth = ds.xtime_startMonthly.isel(xtime_startMonthly=j)
    
            if cellsOnSelectedEdge.isel(TWO=0).isin(xr_cells_inside): # if A is inside the mask
                ds.veloIntoMask.loc[dict(xtime_startMonthly = selectedMonth, nEdges_sorted = selectedEdge)] = ds.timeMonthly_avg_normalVelocity.loc[dict(xtime_startMonthly = selectedMonth, 
                                                                                                                                                         nEdges_sorted = selectedEdge)] * -1
    
            elif cellsOnSelectedEdge.isel(TWO=1).isin(xr_cells_inside): # if B is inside the mask
                ds.veloIntoMask.loc[dict(xtime_startMonthly = selectedMonth, nEdges_sorted = selectedEdge)] = ds.timeMonthly_avg_normalVelocity.loc[dict(xtime_startMonthly = selectedMonth, 
                                                                                                                                                         nEdges_sorted = selectedEdge)] * 1
    return ds, mesh_transect, xr_transect_cellsOnOceanEdges

In [15]:
ds = dss_cleaned
outside_mask_values = np.nan
ds_vIM, mesh_transect, xr_transect_cellsOnOceanEdges = calculate_velo_into_mask(ds, mesh,mask,outside_mask_values)

In [16]:
xr_cells_inside, xr_edges_inside, xr_vertices_inside = mps.xr_inside_mask_info(mesh,mask)

In [17]:
# calculate the area of the edge-layerThickness plane that the normal velocity moves through
# ------ INTERPOLATE LAYER THICKNESS ONTO NEDGES ------
# layerThickness sits on nCells, but veloIntoMask sits on nEdges
# we need to interpolate layerThickness onto nEdges by taking the average of the layerThickness of 2 cells that sit on the transect edge

# select only the layerThicknesses that are on cells that border the edges of the transect
cellsOnTransectEdge_layerThickness = ds_vIM.timeMonthly_avg_layerThickness.isel(nCells = xr_transect_cellsOnOceanEdges)

# take the mean of the layer thicknesses along the dimension "TWO" (take the mean of layer thickness between TWO=0 cell and TWO=1 cell)
interp_transectEdgeLayerThickness = cellsOnTransectEdge_layerThickness.mean(dim='TWO')

# ------ FIND THE WIDTH OF EACH TRANSECT EDGE ------
# find the dvEdge for all edges in transect
transectEdgeLength = mesh_transect.dvEdge

# ------ CALCULATE THE CROSS-SECTIONAL AREA OF THE TRANSECT BY MULTIPLYING THE LAYER THICKNESS BY THE EDGELENGTH ------
transect_area = interp_transectEdgeLayerThickness * transectEdgeLength

# ------ CALCLATE THE TRANSPORT INTO THE MASK ------
# multiply the cross-sectional area by teh veloIntoMask
transport = transect_area * ds_vIM.veloIntoMask

In [18]:
# create a new datavariable for transport, assign the transport to the corresponding edge
ds_vIM['transportIntoMask'] = transport

transportIntoMask_Sv = ds_vIM.transportIntoMask / 10**6

In [19]:
ds_fullMask = ds_vIM.isel(nCells = xr_cells_inside)

In [20]:
# add the corresponding max cell levels to the ds_vIM dataset
ds_fullMask['maxLevelCell'] = mesh.maxLevelCell.isel(nCells = xr_cells_inside)

In [21]:
def fillna_below(ds, coord, data_var):
    # make all values below last level (aka nans) equal to the value of the last non-nan level
    for i in range(0,len(ds[coord])):
        
        # define the slice of vertical levels that are nans
        nVertLevels_nans = slice(np.int32(ds.maxLevelCell[i]),None)
    
        # define the max vertical level
        maxLevelCell = np.int32(ds.maxLevelCell[i]) - 1
    
        # make the nan cells = the data variable at the max vertical level
        ds[data_var].loc[dict(nCells =i, nVertLevels = nVertLevels_nans)] = ds[data_var].loc[dict(nCells =i, nVertLevels = maxLevelCell)]
    
    return ds
    

In [22]:
new = fillna_below(ds_fullMask, 'nCells', 'timeMonthly_avg_potentialDensity')

In [23]:
ds_fullMask.timeMonthly_avg_potentialDensity.ffill(dim='nVertLevels',limit=None)

In [24]:
ds_vIM = ds_fullMask

In [27]:
ds_vIM

In [25]:
# now, we want to attach the potential density to the transport so we can bin transport by potential density class
cellsOnTransectEdge_sigma = ds_vIM.timeMonthly_avg_potentialDensity.isel(nCells = xr_transect_cellsOnOceanEdges)

# take the mean of the potential density along the dimension "TWO" (take the mean of sigma between TWO=0 cell and TWO=1 cell)
interp_transectEdgeSigma = cellsOnTransectEdge_sigma.mean(dim='TWO')

# reassign the interpolated sigma datavariable that now sit on the cell edge instead of cell center
ds_vIM['edgeSigma'] = interp_transectEdgeSigma

edgeSigma_mask = ds_vIM.edgeSigma

IndexError: index 202047 is out of bounds for axis 1 with size 296

In [None]:
edgeSigma_nadw = edgeSigma_mask > 1027.7
np.count_nonzero(edgeSigma_nadw.astype(int)==1)

nadw_transportMask_attempt = ds_vIM.transportIntoMask.where(edgeSigma_nadw ==True)

full_column_transport = transportIntoMask_Sv.sum(dim=['nVertLevels','nEdges_sorted']) 
nadw_column_transport = nadw_transportMask_attempt.sum(dim=['nVertLevels','nEdges_sorted']) / 10**6

display(full_column_transport.mean())
display(nadw_column_transport.mean())

In [None]:
np.arange(999,1028,0.25)

In [None]:
# create vertical xgcm grid
grid = Grid(ds_vIM, coords = {'Z':{'center':'nVertLevels'}},periodic=False,autoparse_metadata=False)

# define target values in density
# edgeSigma_target = np.linspace(1023,1028,100)
edgeSigma_target = np.arange(999,1028,0.25)

# transform from nVertLevels to edgeSigma for the vertical coordinate
transport_transformed = grid.transform(ds_vIM.transportIntoMask, 'Z', edgeSigma_target, target_data = ds_vIM.edgeSigma)
mean_transport = transport_transformed.sum('nEdges_sorted').mean(dim='xtime_startMonthly') / 10**6
mean_transport_reversed  = mean_transport[::-1] # reverse order of transport into mask (surface transport now at bottom)
cs_mean_transport_reversed = np.cumsum(mean_transport_reversed) # take cumsum from 0 to 60 where 0 index is the bottom sigma transport into mask (bottom=0 cumsum)
cs_mean_transport = cs_mean_transport_reversed[::-1] # reverse the order again so now 0 index is the surface (+below) sigma transport into mask (surface=0 cumsum)

In [None]:
plt.plot(cs_mean_transport,edgeSigma_target,marker = '.')
plt.gca().invert_yaxis()

In [None]:
transport_transformed = grid.transform(ds_vIM.transportIntoMask, 'Z', edgeSigma_target, target_data = ds_vIM.edgeSigma)

In [None]:
ds_vIM.edgeSigma.where(ds_vIM.edgeSigma >= 1014).isel(xtime_startMonthly=0, nEdges_sorted=44).plot()

In [None]:
ds_vIM.transportIntoMask.where(ds_vIM.transportIntoMask!=0).isel(xtime_startMonthly=0, nEdges_sorted=44).plot()

In [None]:
plt.scatter(
    ds_vIM.transportIntoMask.isel(xtime_startMonthly=0, nEdges_sorted=44),
    ds_vIM.edgeSigma.isel(xtime_startMonthly=0, nEdges_sorted=44)
)
plt.ylim(1026, 1028)

In [None]:
dso.dims

In [None]:
grid = Grid(
    ds_vIM.expand_dims({'nVertLevelsP1': np.arange(0, 61, 1)}),
    coords = {'Z':{'center':'nVertLevels', 'outer':'nVertLevelsP1'}},
    periodic=False,
    autoparse_metadata=False
)


transport_transformed_cons = grid.transform(ds_vIM.transportIntoMask, 'Z', edgeSigma_target, target_data = ds_vIM.edgeSigma, method="conservative")

In [None]:
transport_transformed_cons.isel(xtime_startMonthly=0, nEdges_sorted=44).plot()
plt.xlim(1026, 1028)

In [None]:
ds_vIM.edgeSigma.where(ds_vIM.edgeSigma >= 1014).isel(xtime_startMonthly=0, nEdges_sorted=44).plot()

In [None]:
transport_transformed.isel(xtime_startMonthly=0, nEdges_sorted=44).plot()

In [None]:
# create vertical xgcm grid
grid = Grid(ds_vIM, coords = {'Z':{'center':'nVertLevels'}},periodic=False,autoparse_metadata=False)

# define target values in density
# edgeSigma_target = np.linspace(1023,1028,100)
edgeSigma_target = np.arange(1020,1028,0.1)

# transform from nVertLevels to edgeSigma for the vertical coordinate
transport_transformed = grid.transform(ds_vIM.transportIntoMask, 'Z', edgeSigma_target, target_data = ds_vIM.edgeSigma)
mean_transport = transport_transformed.sum('nEdges_sorted').mean(dim='xtime_startMonthly') / 10**6
mean_transport_reversed  = mean_transport[::-1] # reverse order of transport into mask (surface transport now at bottom)
cs_mean_transport_reversed = np.cumsum(mean_transport_reversed) # take cumsum from 0 to 60 where 0 index is the bottom sigma transport into mask (bottom=0 cumsum)
cs_mean_transport = cs_mean_transport_reversed[::-1] # reverse the order again so now 0 index is the surface (+below) sigma transport into mask (surface=0 cumsum)

plt.plot(cs_mean_transport,edgeSigma_target,marker = '.')
plt.gca().invert_yaxis()

In [None]:
transport_neg = transport_transformed.where(transport_transformed < 0)
transport_pos = transport_transformed.where(transport_transformed > 0)

(transport_transformed.mean("xtime_startMonthly").sum("nEdges_sorted")*1e-6).plot()
(transport_neg.mean("xtime_startMonthly").sum("nEdges_sorted")*1e-6).plot()
(transport_pos.mean("xtime_startMonthly").sum("nEdges_sorted")*1e-6).plot()

In [None]:
transport_neg = transport_transformed.where(transport_transformed < 0)
transport_pos = transport_transformed.where(transport_transformed > 0)

np.cumsum(transport_transformed.mean("xtime_startMonthly").sum("nEdges_sorted")*1e-6).plot()
np.cumsum(transport_neg.mean("xtime_startMonthly").sum("nEdges_sorted")*1e-6).plot()
np.cumsum(transport_pos.mean("xtime_startMonthly").sum("nEdges_sorted")*1e-6).plot()

In [None]:
transport_transformed.mean('xtime_startMonthly').sum() / 10**6

In [None]:
(transport_transformed.sum('nEdges_sorted').mean('xtime_startMonthly') / 10**6).plot(y='edgeSigma')
plt.ylim(1020,1028)
plt.xlim(-0.15,0.05)
plt.gca().invert_yaxis()
plt.vlines(0,ymin=1020, ymax=1028,color='gray',linestyle='--')
plt.hlines(1027, xmin=-0.15, xmax=0.15,color='gray',linestyle='--')

In [None]:
(transport_transformed.mean('xtime_startMonthly').sum('nEdges_sorted') / 10**6).plot(y='edgeSigma')
plt.gca().invert_yaxis()


In [None]:
(transport_transformed.sum('nEdges_sorted').mean('xtime_startMonthly') / 10**6).plot(y='edgeSigma')
# plt.ylim(1020,1028)
plt.xlim(-0.15,0.05)
plt.gca().invert_yaxis()
plt.vlines(0,ymin=999, ymax=1028,color='gray',linestyle='--')
plt.hlines(1027, xmin=-0.15, xmax=0.15,color='gray',linestyle='--')

In [None]:
fig,ax = plt.subplots(nrows=1,ncols=1,constrained_layout=True,figsize=(12,7))
(ds_vIM.mean(dim='xtime_startMonthly')/10**6).transportIntoMask.plot(y='nVertLevels')
clevels = np.arange(1024,1028,0.5)
for_clabel = ds_vIM.mean(dim='xtime_startMonthly').edgeSigma.plot.contour(y='nVertLevels',levels=clevels,add_colorbar=True)
ax.clabel(for_clabel,colors='k')
ax.invert_yaxis()

In [None]:
ds_vIM.mean(dim='xtime_startMonthly').edgeSigma


In [None]:
plt.plot(cs_mean_transport, edgeSigma_target)
plt.gca().invert_yaxis()

In [None]:
mean_transport_reversed  = mean_transport[::-1] # reverse order of transport into mask (surface transport now at bottom)
cs_mean_transport_reversed = np.cumsum(mean_transport_reversed) # take cumsum from 0 to 60 where 0 index is the bottom sigma transport into mask (bottom=0 cumsum)
cs_mean_transport = cs_mean_transport_reversed[::-1] # reverse the order again so now 0 index is the surface (+below) sigma transport into mask (surface=0 cumsum)
cs_mean_transport

In [None]:
plt.plot(cs_mean_transport, edgeSigma_target)
plt.gca().invert_yaxis()

In [None]:
np.flipud(mean_transport.cumsum())

In [None]:
plt.plot(cs_mean_transport,edgeSigma_target)
# plt.gca().invert_yaxis()

In [None]:
# np.cumsum(transport.where(transport!=0).mean("xtime_startMonthly").sum("nEdges")*1e-6).plot()
# np.cumsum(transport_neg.where(transport_neg!=0).mean("xtime_startMonthly").sum("nEdges")*1e-6).plot()
# np.cumsum(transport_pos.where(transport_pos!=0).mean("xtime_startMonthly").sum("nEdges")*1e-6).plot()

In [None]:
# cellsOnTransectEdge_layerThickness.where(cellsOnTransectEdge_layerThickness!=0).isel(TWO=0).plot()

In [None]:
# cellsOnTransectEdge_layerThickness.where(cellsOnTransectEdge_layerThickness!=0).isel(TWO=1).plot()

In [None]:
# # create a new datavariable for transport, assign the transport to the corresponding edge
# dso['transportIntoMask'] = dso.veloIntoMask * 0 + np.nan

# for i in range(0,len(interp_transectEdgeLayerThickness)):
#     for j in range(0,len(dso.xtime_startMonthly)):
#         selectedEdge = transport.nEdges.isel(nEdges=i)
#         selectedMonth = dso.xtime_startMonthly[j]
    
#         dso.transportIntoMask.loc[dict(xtime_startMonthly=selectedMonth, nEdges = selectedEdge)] = transport.loc[dict(xtime_startMonthly = selectedMonth, nEdges = selectedEdge)]

# transportIntoMask_Sv = dso.transportIntoMask / 10**6

In [None]:
# # from the transect, create a mask to capture the entire region specified by the transects
# # this will also output lats and lons corresponding to test_verts
# filepath = './'
# filename = 'LS_test'
# geojson_file_name = 'Labrador Sea from transect algorithm'
# tags = "Labrador_Sea;Davis_Strait"
# author = "Kaila Uyeda"

# test_verts_lats, test_verts_lons, dsMasks = mps.transect_from_alg_create_nc(test_verts, 
#                                                                             mesh, 
#                                                                             filepath,
#                                                                             filename, 
#                                                                             geojson_file_name,
#                                                                             tags, 
#                                                                             author)

In [None]:
# net_transportIntoMask_Sv = transportIntoMask_Sv.sum(dim=['nEdges','nVertLevels'])
# net_transportIntoMask_Sv.mean(dim='xtime_startMonthly')

In [None]:
# transportIntoMask_Sv

In [None]:
# column_transport = dso.transportIntoMask.isel(xtime_startMonthly=0).sum(dim='nVertLevels')
# OSNAP_column_transport = column_transport.where(np.isin(mesh.nEdges, OSNAP_edges))
# OSNAP_column_transport_Sv = OSNAP_column_transport / 10**6

In [None]:
# non_OSNAP_column_transport = column_transport.where(~np.isin(mesh.nEdges, OSNAP_edges))
# non_OSNAP_column_transport = non_OSNAP_column_transport / 10**6

In [None]:
# PLOT JUST OSNAP
# fig,ax = plt.subplots(nrows=1,ncols=1,constrained_layout=True,figsize=(14,10))

# cellmask = dsMasks.regionCellMasks.isel(nRegions=0).astype(bool)
# b = mosaic.polypcolor(ax,descriptor,
#                   cellmask.astype(int),cmap='cool',alpha = 0.4,
#                   antialiaseds=False)


# c = mosaic.polypcolor(ax,descriptor,
#                   OSNAP_column_transport_Sv,cmap=cmo.balance,vmin=-10,vmax=10,
#                   antialiaseds=False)
# ax.set_xlim(290, 318)
# ax.set_ylim(51.5, 68)
# fig.colorbar(c, extend='both')
# ax.set_title('Net transport into mask through "OSNAP" array is ' + OSNAP_rounded_total_transport)
# # this is making me feel like I should flip the signs of the for loop?? 
# # But then this would mean there's a net increase of water in this region instead of export

In [None]:
# PLOT A SELECTED LEVEL
# fig,ax = plt.subplots(nrows=1,ncols=1,constrained_layout=True,figsize=(14,10))

# cellmask = dsMasks.regionCellMasks.isel(nRegions=0).astype(bool)
# b = mosaic.polypcolor(ax,descriptor,
#                   cellmask.astype(int),cmap='cool',alpha = 0.4,
#                   antialiaseds=False)


# c = mosaic.polypcolor(ax,descriptor,
#                   transportIntoMask_Sv.isel(Time=0,nVertLevels=20),cmap=cmo.balance,vmin=-0.05,vmax=0.05,
#                   antialiaseds=False)
# ax.set_xlim(290, 318)
# ax.set_ylim(51.5, 68)
# fig.colorbar(c, extend='both')
# # this is making me feel like I should flip the signs of the for loop?? 
# # But then this would mean there's a net increase of water in this region instead of export

In [None]:
# OSNAP_rounded_total_transport = str(np.round(OSNAP_column_transport_Sv.sum().values,decimals=3))
# str_net_transportIntoMask_Sv = str(np.round(net_transportIntoMask_Sv.values,decimals=3))
# str_nonOSNAP_transportIntoMask_Sv = str(np.round(non_OSNAP_column_transport.sum().values,decimals=3))

In [None]:

# fig,ax = plt.subplots(nrows=1,ncols=1,constrained_layout=True,figsize=(14,10))

# cellmask = dsMasks.regionCellMasks.isel(nRegions=0).astype(bool)
# b = mosaic.polypcolor(ax,descriptor,
#                   cellmask.astype(int),cmap='cool',alpha = 0.4,
#                   antialiaseds=False)

# c = mosaic.polypcolor(ax,descriptor,
#                   transportIntoMask_Sv.sum(dim='nVertLevels').mean(dim='xtime_startMonthly'),cmap=cmo.balance,vmin=-6,vmax=6,
#                   antialiaseds=False)


# ax.set_xlim(290, 318)
# ax.set_ylim(51.5, 68)
# fig.colorbar(c, extend='both')
# ax.set_title('Net transport into mask is ' + str_net_transportIntoMask_Sv + 'Sv \n OSNAP: ' + OSNAP_rounded_total_transport + ' & non-OSNAP: ' + str_nonOSNAP_transportIntoMask_Sv)
# # this is making me feel like I should flip the signs of the for loop?? 
# # But then this would mean there's a net increase of water in this region instead of export

```ruby
 'config_eos_linear_alpha': np.float64(0.2), 'config_eos_linear_beta': np.float64(0.8),
 'config_eos_linear_Tref': np.float64(5.0),
 'config_eos_linear_Sref': np.float64(35.0),
 'config_eos_linear_densityref': np.float64(1000.0),
 'config_eos_type': 'jm',
 ```
### [Fortran code for JM equation of state in MPAS-Ocean](https://github.com/kailauyeda/E3SM/blob/aab1d951cb7e6dcae737f8412109607efd25f2e3/components/mpas-ocean/src/shared/mpas_ocn_equation_of_state_jm.F#L17)
(unfortunately, I have no idea if there ie s a Python package that will calculate this...)

### to look at transport for a density bin that is greater than 1027.7 kg/m^3

In [None]:
# print('masked sigma max is 1027? That\'s it??')

In [None]:
# # we want to select only the potential density values greater than 1025
# # to do so, we need to interpolate potential density (on nCells) onto nEdges
# cellsOnTransectEdge_sigma = dso.timeMonthly_avg_potentialDensity.isel(nCells=xr_transect_cellsOnOceanEdges)

# # take the mean of the potential density along the dimension "TWO" (take the mean of sigma between TWO=0 cell and TWO=1 cell)
# interp_transectEdgeSigma = cellsOnTransectEdge_sigma.mean(dim='TWO')

# # reassign the interpolated sigma datavariable that now sit on the cell edge instead of cell center 
# # create a new datavariable for transport, assign the transport to the corresponding edge
# dso['edgeSigma'] = dso.transportIntoMask * 0 + np.nan

# dso.edgeSigma.loc[dict(xtime_startMonthly=dso.xtime_startMonthly, nEdges = interp_transectEdgeSigma.nEdges)] = interp_transectEdgeSigma.loc[dict(xtime_startMonthly=dso.xtime_startMonthly,nEdges = interp_transectEdgeSigma.nEdges)]
# # for i in range(0,len(interp_transectEdgeSigma)):
# #     selectedEdge = interp_transectEdgeSigma.nEdges.isel(nEdges=i)
    
# #     mesh.edgeSigma.loc[dict(Time=0, nEdges = selectedEdge)] = interp_transectEdgeSigma.loc[dict(nEdges = selectedEdge)]

# edgeSigma_mask = dso.edgeSigma#.isel(Time=0)

In [None]:
# edgeSigma_nadw = edgeSigma_mask > 1027.7
# np.count_nonzero(edgeSigma_nadw.astype(int)==1)

# nadw_transportMask_attempt = dso.transportIntoMask.where(edgeSigma_nadw ==True)

# full_column_transport = transportIntoMask_Sv.sum(dim='nVertLevels') 
# nadw_column_transport = nadw_transportMask_attempt.sum(dim='nVertLevels') / 10**6

In [None]:
# nadw_transportMask_attempt = dso.transportIntoMask.where(edgeSigma_nadw ==True)

In [None]:
# full_column_transport = transportIntoMask_Sv.sum(dim='nVertLevels') 
# nadw_column_transport = nadw_transportMask_attempt.sum(dim='nVertLevels') / 10**6

### to look at transport for all density bins

In [None]:
dso_mini = dso[['edgeSigma','transportIntoMask']].isel(nEdges = xr_transect_cellsOnOceanEdges.nEdges)

# create vertical xgcm grid
grid = Grid(dso, coords={'Z':{'center':'nVertLevels'}},periodic=False,autoparse_metadata=False)

# define target values in density
edgeSigma_target = np.linspace(999,1028,60)

# transform
transport_transformed = grid.transform(dso_mini.transportIntoMask, 'Z', edgeSigma_target, target_data = dso_mini.edgeSigma)
mean_transport = transport_transformed.sum('nEdges').mean(dim='xtime_startMonthly') / 10**6
cs_mean_transport = np.flipud(mean_transport.cumsum())
cs_mean_transport

In [None]:
plt.plot(cs_mean_transport,edgeSigma_target)
plt.gca().invert_yaxis()

In [None]:
# multiindex to allow for binning transport by density
# .isel for nEdges that are along the tansect edge...
dso_mini = dso[['edgeSigma','transportIntoMask']].isel(nEdges = xr_transect_cellsOnOceanEdges.nEdges)
dso_mini

# create arrays from the values of coordinates in the mini dso
nEdge_array = dso_mini.nEdges.values
nVertLevels_array = dso_mini.nVertLevels.values
xtime_startMonthly_array = dso.xtime_startMonthly.values

# there's probably a more efficient, clear way to do this, but I cannot think of one rn
# swap dims for edges and vertices, but preserve the time dimension
# then re-concatenate the datasets so that there are edgesigma for every time step (month)
stacked_dso_mini = dso_mini.stack(edges_vertices = ('nEdges','nVertLevels'))
ds_swapped_dims_list = []
for i in range(0,len(stacked_dso_mini.xtime_startMonthly)):
    ds_byMonth = stacked_dso_mini.isel(xtime_startMonthly=i).swap_dims({'edges_vertices':'edgeSigma'})
    ds_byMonth_expand_dims = ds_byMonth.expand_dims(dim='xtime_startMonthly')
    ds_swapped_dims_list.append(ds_byMonth_expand_dims)

ds_swapped_dims = xr.concat(ds_swapped_dims_list,dim='edgeSigma')

# for each of the density bins, calculate the average 
# I'm going to try to bin first and then average those bins... 
# say there's a bunch of dense water transport in DJF (more than the yearly average)
# if we were to take a yearly average first, then we wouldn't see this transport at that density bin (it'd be accounted for at a lower transport 
# where the lower transport is due to an average transport that is lower than the dense water transport...

# to bin the data by density (edgeSigma), we will use xgcm
grid = Grid(dso, coords={'Z':{'center':'nVertLevels'}},periodic=False,autoparse_metadata=False)

# define the target values in edgeSigma
edgeSigma_target = np.linspace(999,1028,60)

# # transform
# nVertLevels_transformed = grid.transform(ds_swapped_dims.edgeSigma, 'Z', edgeSigma_target, target_data = ds_swapped_dims.transportIntoMask)
# nVertLevels_transformed

In [None]:
dso

In [None]:
ds_swapped_dims

In [None]:
bin0 = list(binned_transport.groups.keys())[0]
print(binned_transport.apply(lambda x:x).sel(edgeSigma=bin0))

In [None]:
# multiindex to allow for binning transport by density
# .isel for nEdges that are along the tansect edge...
mesh_mini = mesh[['edgeSigma','transportIntoMask']].isel(Time=0,nEdges = xr_transect_cellsOnOceanEdges.nEdges)

# create arrays from the values of coordinates in the mini mesh
nEdge_array = mesh_mini.nEdges.values
nVertLevels_array = mesh_mini.nVertLevels.values

stacked_mesh_mini = mesh_mini.stack(edges_vertices = ('nEdges','nVertLevels'))
# stacked_mesh_mini.edges_vertices

# collect the transports and sigmas
transport_nadw = stacked_mesh_mini.transportIntoMask
sigma_nadw = stacked_mesh_mini.edgeSigma
# nadw_edges_vertices = transport_nadw.edges_vertices

# convert to datasets
transport_nadw_ds = transport_nadw.to_dataset(name='transportIntoMask')
sigma_nadw_ds = sigma_nadw.to_dataset(name='edgeSigma')

# merge the edgeSigma and transportIntoMask datasets to become one single dataset 
# just in case a .where would take too long
stacked_mesh_mini = xr.merge([transport_nadw_ds, sigma_nadw_ds])

# convert edgeSigma into a coordinate
transport_from_edgeSigma = stacked_mesh_mini.swap_dims({"edges_vertices":"edgeSigma"})
transport_from_edgeSigma

# bin transportIntoMask by edgeSigma values
sigma_bins = np.linspace(1024.4,1028,30)
grouped_transport = transport_from_edgeSigma.transportIntoMask.groupby_bins(transport_from_edgeSigma.edgeSigma,bins=sigma_bins)

# get the sum of transport for each density bin
grouped_transport_sum = grouped_transport.sum() / 10**6

In [None]:
# multiindex to allow for binning transport by density
# .isel for nEdges that are along the tansect edge...
mesh_mini = mesh[['edgeSigma','transportIntoMask']].isel(Time=0,nEdges = xr_transect_cellsOnOceanEdges.nEdges)

# create arrays from the values of coordinates in the mini mesh
nEdge_array = mesh_mini.nEdges.values
nVertLevels_array = mesh_mini.nVertLevels.values

stacked_mesh_mini = mesh_mini.stack(edges_vertices = ('nEdges','nVertLevels'))
# stacked_mesh_mini.edges_vertices

# collect the transports and sigmas
transport_nadw = stacked_mesh_mini.transportIntoMask
sigma_nadw = stacked_mesh_mini.edgeSigma
# nadw_edges_vertices = transport_nadw.edges_vertices

# convert to datasets
transport_nadw_ds = transport_nadw.to_dataset(name='transportIntoMask')
sigma_nadw_ds = sigma_nadw.to_dataset(name='edgeSigma')

# merge the edgeSigma and transportIntoMask datasets to become one single dataset 
# just in case a .where would take too long
stacked_mesh_mini = xr.merge([transport_nadw_ds, sigma_nadw_ds])

# convert edgeSigma into a coordinate
transport_from_edgeSigma = stacked_mesh_mini.swap_dims({"edges_vertices":"edgeSigma"})
transport_from_edgeSigma

# bin transportIntoMask by edgeSigma values
sigma_bins = np.linspace(1024.4,1028,30)
grouped_transport = transport_from_edgeSigma.transportIntoMask.groupby_bins(transport_from_edgeSigma.edgeSigma,bins=sigma_bins)

# get the sum of transport for each density bin
grouped_transport_sum = grouped_transport.sum() / 10**6

In [None]:
transport_from_edgeSigma.edgeSigma.max()

In [None]:
grouped_transport

In [None]:
len(sigma_bins[1:])

In [None]:
grouped_transport_sum = grouped_transport.sum() / 10**6
grouped_transport_cumsum_denseToLight = np.flipud(grouped_transport_sum.cumsum())
grouped_transport_cumsum_denseToLight

In [None]:
grouped_transport_sum.cumsum()

In [None]:
grouped_transport_cumsum_denseToLight

In [None]:
grouped_transport_sum

In [None]:
# plot this 
fig,ax = plt.subplots(nrows=1,ncols=1,constrained_layout=True)
ax.plot(grouped_transport_cumsum_denseToLight, sigma_bins[1:])
ax.invert_yaxis()
ax.axvline(x=0,ls='--',color='gray')
ax.set_title('Transport of water into (+) and out of (-) masked LS region')
ax.set_ylabel('potential density')
ax.set_xlabel('Transport (Sv)')