In [1]:
import xarray as xr
import numpy as np
import yaml
import os
import pop_tools
import esmlab
import esmlab_regrid
esmlab.config.set({'regrid.gridfile-directory' : 'scrip_grids/'})

%matplotlib inline

Open all 12 files in `/glade/work/mclong/seawifs/chl_gsm`

```
$ ls -1
S19972442010273.L3m_MC_GSM_chl_gsm_9km.nc
S19972742010304.L3m_MC_GSM_chl_gsm_9km.nc
S19973052010334.L3m_MC_GSM_chl_gsm_9km.nc
S19973352010365.L3m_MC_GSM_chl_gsm_9km.nc
S19980012010031.L3m_MC_GSM_chl_gsm_9km.nc
S19980322010059.L3m_MC_GSM_chl_gsm_9km.nc
S19980602010090.L3m_MC_GSM_chl_gsm_9km.nc
S19980912010120.L3m_MC_GSM_chl_gsm_9km.nc
S19981212010151.L3m_MC_GSM_chl_gsm_9km.nc
S19981522010181.L3m_MC_GSM_chl_gsm_9km.nc
S19981822010212.L3m_MC_GSM_chl_gsm_9km.nc
S19982132010243.L3m_MC_GSM_chl_gsm_9km.nc
```

Note that, in order, these are

```
Sept (1997 - 2010)
Oct (1997 - 2010)
Nov (1997 - 2010)
Dec (1997 - 2010)
Jan (1998 - 2010)
Feb (1998 - 2010)
March (1998 - 2010)
April (1998 - 2010)
May (1998 - 2010)
June (1998 - 2010)
July (1998 - 2010)
Aug (1998 - 2010)
```

In [2]:
# Read mapping_dict from YAML
with open('mapping_dict.yaml') as yaml_file:
    mapping_dict = yaml.safe_load(yaml_file)

dict_of_datasets = dict()
for month in mapping_dict:
    file_loc = os.path.join(os.path.sep, 'glade', 'work', 'mclong', 'seawifs', 'chl_gsm', mapping_dict[month])
    dict_of_datasets[month] = xr.open_dataset(file_loc)


In [3]:
'''
netcdf POP_gx1v7 {
dimensions:
    grid_size = 122880 ;
    grid_rank = 2 ;
    grid_corners = 4 ;
variables:
    int grid_dims(grid_rank) ;
    double grid_center_lat(grid_size) ;
        grid_center_lat:units = "radians" ;
    double grid_center_lon(grid_size) ;
        grid_center_lon:units = "radians" ;
    double grid_area(grid_size) ;
    int grid_imask(grid_size) ;
        grid_imask:units = "unitless" ;
    double grid_corner_lat(grid_size, grid_corners) ;
        grid_corner_lat:units = "radians" ;
    double grid_corner_lon(grid_size, grid_corners) ;
        grid_corner_lon:units = "radians" ;

'''

def gen_scrip_grid(ds_in, var_for_mask):
    """
        Takes lat, lon information from seaWIFS dataset and creates a SCRIP grid file
        NOTES:
        1. using "degrees_east" and "degrees_north" instead of just "degrees" caused ESMF issues
        2. I have not yet computed grid_area correctly
    """
    # Pull grid out of dataset
    lats = ds_in['lat'].data
    nlat = len(lats)
    lons = ds_in['lon'].data
    nlon = len(lons)
    
    # Define lat_bnds and lon_bnds (to convert to corners)
    lat_bnds = np.empty_like(lats, shape=(nlat, 2))
    lat_bnds[0, 0] = 90.
    lat_bnds[1:, 0] = 0.5*(lats[1:] + lats[:-1])
    lat_bnds[:-1, 1] = 0.5*(lats[1:] + lats[:-1])
    lat_bnds[-1, 1] = -90.
    lon_bnds = np.empty_like(lons, shape=(nlon, 2))
    lon_bnds[0, 0] = -180.
    lon_bnds[1:, 0] = 0.5*(lons[1:] + lons[:-1])
    lon_bnds[:-1, 1] = 0.5*(lons[1:] + lons[:-1])
    lon_bnds[-1, 1] = 180.

    # Define lat_corners and lon_corners
    lat_corners = np.empty_like(lats, shape=(nlat, 4))
    lat_corners[:, 0] = lat_bnds[:, 1] # SW
    lat_corners[:, 1] = lat_bnds[:, 1] # SE
    lat_corners[:, 2] = lat_bnds[:, 0] # NE
    lat_corners[:, 3] = lat_bnds[:, 0] # NW

    lon_corners = np.empty_like(lons, shape=(nlon, 4))
    lon_corners[:, 0] = lon_bnds[:, 0] # SW
    lon_corners[:, 1] = lon_bnds[:, 1] # SE
    lon_corners[:, 2] = lon_bnds[:, 1] # NE
    lon_corners[:, 3] = lon_bnds[:, 0] # NW

    # Define dimensions for xarray
    grid_size = nlat*nlon
    grid_rank = 2
    grid_dims = [nlon, nlat]

    # Set up local copies of data to add to scrip dataset
    lats_repeat = np.repeat(lats[:,np.newaxis], nlon, axis=1).reshape(nlat*nlon)
    lons_repeat = np.repeat(lons[np.newaxis,:], nlat, axis=0).reshape(nlat*nlon)
    grid_imask = np.where(np.isnan(ds_in[var_for_mask].data), 0, 1).reshape(nlat*nlon)
    lat_corners_repeat = np.repeat(lat_corners[:,np.newaxis,:], nlon, axis=1).reshape((nlat*nlon, 4))
    lon_corners_repeat = np.repeat(lon_corners[np.newaxis,:,:], nlat, axis=0).reshape((nlat*nlon, 4))

    # compute grid_area
    s_lat = lat_corners_repeat[:,0]*np.pi/180 # SW
    n_lat = lat_corners_repeat[:,2]*np.pi/180 # NE
    w_lon = lon_corners_repeat[:,0]*np.pi/180 # SW
    e_lon = lon_corners_repeat[:,2]*np.pi/180 # NE
    grid_area = (np.sin(n_lat) - np.sin(s_lat))*(e_lon - w_lon)

    # Need to create scrip_ds
    scrip_ds = xr.Dataset()
    scrip_ds['grid_dims'] = xr.DataArray(grid_dims, dims='grid_rank')
    scrip_ds['grid_center_lon'] = xr.DataArray(lons_repeat, dims='grid_size')
    scrip_ds['grid_center_lat'] = xr.DataArray(lats_repeat, dims='grid_size')
    scrip_ds['grid_imask'] = xr.DataArray(grid_imask, dims='grid_size')
    scrip_ds['grid_corner_lon'] = xr.DataArray(lon_corners_repeat, dims=('grid_size', 'grid_corners'))
    scrip_ds['grid_corner_lat'] = xr.DataArray(lat_corners_repeat, dims=('grid_size', 'grid_corners'))
    scrip_ds['grid_area'] = xr.DataArray(grid_area, dims='grid_size')

    # Set units
    scrip_ds['grid_center_lon'].attrs['units'] = 'degrees'
    scrip_ds['grid_center_lat'].attrs['units'] = 'degrees'
    scrip_ds['grid_corner_lon'].attrs['units'] = 'degrees'
    scrip_ds['grid_corner_lat'].attrs['units'] = 'degrees'
    scrip_ds['grid_area'].attrs['units'] = 'radians^2'

    return scrip_ds

In [4]:
scrip_in = gen_scrip_grid(dict_of_datasets['Jan'], 'chl_gsm')
# I think I need to add grid corners?
outfile = 'scrip_grids/seaWIFS.nc'
os.system(f'rm -f {outfile}')
scrip_in.to_netcdf('scrip_grids/seaWIFS.nc')

In [5]:
np.sum(scrip_in['grid_area'].data)

12.566379

In [6]:
# TODO: want conservative map rather than bilinear

# Note: I ended up building this file in parallel using the CIME mapping tools
# $ qcmd -- ./create_ESMF_map.sh -fsrc /glade/work/mlevy/codes/cesm2-marbl/notebooks/chl_regrid/scrip_grids/seaWIFS.nc \
#                                -fdst /glade/work/mlevy/codes/cesm2-marbl/notebooks/chl_regrid/scrip_grids/POP_gx1v7.nc \
#                                -nsrc seaWIFS -ndst POP_gx1v7 -map aave
# which lead to
# $ mpirun -np 36 ESMF_RegridWeightGen --ignore_unmapped -m bilinear -w map_seaWIFS_TO_POP_gx1v7_blin.200205.nc \
#                                      -s /glade/work/mlevy/codes/cesm2-marbl/notebooks/chl_regrid/scrip_grids/seaWIFS.nc \
#                                      -d /glade/work/mlevy/codes/cesm2-marbl/notebooks/chl_regrid/scrip_grids/POP_gx1v7.nc
# followed by
# $ cp map_seaWIFS_TO_POP_gx1v7_aave.200205.nc /glade/work/mlevy/codes/cesm2-marbl/notebooks/chl_regrid/scrip_grids/weights/seaWIFS_to_POP_gx1v7_conservative.nc
regrid_operator = esmlab_regrid.regridder(name_grid_src='seaWIFS',
                                          name_grid_dst='POP_gx1v7', 
                                          method='conservative',
                                          overwrite_existing=False)


In [7]:
%%time

list_of_mapped_das = []
for month in dict_of_datasets:
    print(f'Interpolating data from {month}')
    list_of_mapped_das.append(regrid_operator(dict_of_datasets[month]['chl_gsm'], renormalize=True, apply_mask=True).rename({'lat' : 'nlat', 'lon' : 'nlon'}))

Interpolating data from Jan
Interpolating data from Feb
Interpolating data from Mar
Interpolating data from Apr
Interpolating data from May
Interpolating data from Jun
Interpolating data from Jul
Interpolating data from Aug
Interpolating data from Sep
Interpolating data from Oct
Interpolating data from Nov
Interpolating data from Dec
CPU times: user 4.25 s, sys: 2.2 s, total: 6.44 s
Wall time: 6.91 s


In [8]:
# 1) Create template for dataset on POP grid
#    Note that I remove all variables on vertical grid and dual grid, then promote TLAT and TLONG to coordinates
ds_new = pop_tools.get_grid(grid_name='POP_gx1v7').drop(['dz', 'z_t', 'z_w', 'z_w_bot', 'KMT', 'ULAT', 'ULONG']).set_coords(['TLAT', 'TLONG'])

# 2) Concatenate remapped data arrays over new dimension named time
#    TODO: figure out best values for time (currently an array of str)
ds_new['chl_gsm'] = xr.concat(list_of_mapped_das, dim='month')
ds_new['month'] = [month for month in dict_of_datasets]

# 3) Clean up metadata -- remove unneeded global attributes as well as coordinates attribute from all variables that have it
del(ds_new.attrs['region_mask_regions'])
del(ds_new.attrs['topography_fname'])
for var in ds_new.variables:
    if 'coordinates' in ds_new[var].attrs:
        del(ds_new[var].attrs['coordinates'])

# 4) Enforce valid_min and valid_max
var = 'chl_gsm'
if 'valid_min' in ds_new[var].attrs:
    ds_new[var].data = np.where(ds_new[var].data < ds_new[var].attrs['valid_min'], ds_new[var].attrs['valid_min'], ds_new[var].data)
    del(ds_new[var].attrs['valid_min'])
if 'valid_max' in ds_new[var].attrs:
    ds_new[var].data = np.where(ds_new[var].data > ds_new[var].attrs['valid_max'], ds_new[var].attrs['valid_max'], ds_new[var].data)
    del(ds_new[var].attrs['valid_max'])

# 5) Print resulting dataset to screen
print(ds_new)
print(f"Min value of chl_gsm: {np.nanmin(ds_new['chl_gsm'].data)}")
print(f"Max value of chl_gsm: {np.nanmax(ds_new['chl_gsm'].data)}")

# 6) Generate netCDF file
#    TODO: come up with a better file name
ds_new.to_netcdf('seaWIFS.chl_gsm.mon_climo.Sep1997_Dec2010.nc')

<xarray.Dataset>
Dimensions:      (month: 12, nlat: 384, nlon: 320)
Coordinates:
    TLAT         (nlat, nlon) float64 -79.22 -79.22 -79.22 ... 72.2 72.19 72.19
    TLONG        (nlat, nlon) float64 320.6 321.7 322.8 ... 318.9 319.4 319.8
  * month        (month) <U3 'Jan' 'Feb' 'Mar' 'Apr' ... 'Sep' 'Oct' 'Nov' 'Dec'
Dimensions without coordinates: nlat, nlon
Data variables:
    DXT          (nlat, nlon) float64 2.339e+06 2.339e+06 ... 1.473e+06
    DYT          (nlat, nlon) float64 5.94e+06 5.94e+06 ... 5.046e+06 5.046e+06
    TAREA        (nlat, nlon) float64 1.39e+13 1.39e+13 ... 7.431e+12 7.432e+12
    REGION_MASK  (nlat, nlon) int32 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
    chl_gsm      (month, nlat, nlon) float64 nan nan nan nan ... nan nan nan nan
Attributes:
    lateral_dims:       [384, 320]
    vertical_dims:      60
    vert_grid_file:     /glade/work/mlevy/miniconda3/envs/cesm2-marbl/lib/pyt...
    horiz_grid_fname:   /glade/p/cesmdata/cseg/inputdata/ocn/pop/gx1v7/gr

