# Generate Requested Remapped 2D Fields

In [1]:
import datetime
import glob
import math
import pprint

import numpy as np
import xarray as xr
import yaml

from ocean_remap import ocean_remap
from utils import time_set_mid
from utils_units import clean_units, conv_units

In [2]:
xr.set_options(keep_attrs=True);

In [3]:
with open("GCB_metadata.yaml", mode="r") as fptr:
    GCB_metadata = yaml.safe_load(fptr)
pprint.pprint(GCB_metadata)

{'A': {'cases': ['g.e22.GOMIPECOIAF_JRA-1p4-2018.TL319_g17.GCB_2022.BDRD.001',
                 'g.e22.GOMIPECOIAF_JRA-1p4-2018.TL319_g17.GCB_2022.BDRD.002']},
 'B': {'cases': ['g.e22.GOMIPECOIAF_JRA-1p4-2018.TL319_g17.GCB_2022.BCRC.001']},
 'C': {'cases': ['g.e22.GOMIPECOIAF_JRA-1p4-2018.TL319_g17.GCB_2022.BDRC.001']},
 'D': {'cases': ['g.e22.GOMIPECOIAF_JRA-1p4-2018.TL319_g17.GCB_2022.BCRD.001',
                 'g.e22.GOMIPECOIAF_JRA-1p4-2018.TL319_g17.GCB_2022.BCRD.002']}}


In [4]:
tseries_root = "/glade/campaign/cesm/development/bgcwg/projects/GCB_2022"
submission_dir = f"{tseries_root}/submission"

In [5]:
def gen_single_var_ds(CESM_cases, gcomp, freq, scomp, stream, varname, isel_dict=None):
    paths = []
    for case in CESM_cases:
        dir = f"{tseries_root}/{case}/output/{gcomp}/proc/tseries/{freq}"
        case_paths = glob.glob(f"{dir}/{case}.{scomp}.{stream}.{varname}.*.nc")
        case_paths.sort()
        paths.extend(case_paths)

    kwargs = {
        "compat": "override",
        "data_vars": "minimal",
        "coords": "minimal",
    }

    ds = xr.open_mfdataset(paths, **kwargs)

    if isel_dict is not None:
        ds = ds.isel(isel_dict)
        for key, value in isel_dict.items():
            if isinstance(value, int):
                ds = ds.drop_vars(key)

    # copy metadata not propagated by open_mfdataset from 1st file
    ds0 = xr.open_dataset(paths[0])
    for key in ["unlimited_dims"]:
        if key in ds0.encoding:
            ds.encoding[key] = ds0.encoding[key]
    ds["time"].encoding = ds0["time"].encoding

    # remove CESM specific variable attributes
    del ds[varname].attrs["grid_loc"]

    return time_set_mid(ds, "time")

In [6]:
def remap_2d_driver(GCB_name, CESM_cases, matrix):
    print(GCB_name)
    print(CESM_cases)

    (yr_lo, yr_hi) = (1959, 2021)
    time_slice = slice(f"{yr_lo:4}-01-01", f"{(yr_hi+1):4}-01-01")

    # ds_in = gen_single_var_ds(CESM_cases, "ocn", "month_1", "pop", "h", "FG_CO2").sel(time=time_slice)
    # da = remap_2d(ds_in, "FG_CO2", matrix, apply_area_corr=True)
    # da.name = "fgco2"
    # da = conv_units(da, "mol m-2 s-1")
    # remap_2d_write(GCB_name, ds_in, da, yr_lo, yr_hi)

    ds_in = gen_single_var_ds(CESM_cases, "ocn", "month_1", "pop", "h", "fCO2").sel(time=time_slice)
    da = remap_2d(ds_in, "fCO2", matrix)
    da.name = "sfco2"
    remap_2d_write(GCB_name, ds_in, da, yr_lo, yr_hi)
    
    return

    ds_in = gen_single_var_ds(CESM_cases, "ocn", "month_1", "pop", "h", "IFRAC").sel(time=time_slice)
    da = remap_2d(ds_in, "IFRAC", matrix)
    da.name = "fice"
    da.attrs["units"] = clean_units(da.attrs["units"])
    remap_2d_write(GCB_name, ds_in, da, yr_lo, yr_hi)

    isel_dict = {"z_t": 0}
    ds_in = gen_single_var_ds(CESM_cases, "ocn", "month_1", "pop", "h", "DIC", isel_dict).sel(time=time_slice)
    da = remap_2d(ds_in, "DIC", matrix)
    da.name = "dissicos"
    da.attrs["long_name"] = "Surface " + da.attrs["long_name"]
    da = conv_units(da, "mol m-3")
    remap_2d_write(GCB_name, ds_in, da, yr_lo, yr_hi)

    isel_dict = {"z_t": 0}
    ds_in = gen_single_var_ds(CESM_cases, "ocn", "month_1", "pop", "h", "ALK", isel_dict).sel(time=time_slice)
    da = remap_2d(ds_in, "ALK", matrix)
    da.name = "talkos"
    da.attrs["long_name"] = "Surface " + da.attrs["long_name"]
    da.attrs["units"] = clean_units(da.attrs["units"])
    da = conv_units(da, "mol m-3")
    remap_2d_write(GCB_name, ds_in, da, yr_lo, yr_hi)

    isel_dict = {"z_t": 0}
    ds_in = gen_single_var_ds(CESM_cases, "ocn", "month_1", "pop", "h", "TEMP", isel_dict).sel(time=time_slice)
    da = remap_2d(ds_in, "TEMP", matrix)
    da.name = "tos"
    da.attrs["long_name"] = "Surface " + da.attrs["long_name"]
    remap_2d_write(GCB_name, ds_in, da, yr_lo, yr_hi)

    isel_dict = {"z_t": 0}
    ds_in = gen_single_var_ds(CESM_cases, "ocn", "month_1", "pop", "h", "SALT", isel_dict).sel(time=time_slice)
    da = remap_2d(ds_in, "SALT", matrix)
    da.name = "sos"
    da.attrs["long_name"] = "Surface " + da.attrs["long_name"]
    remap_2d_write(GCB_name, ds_in, da, yr_lo, yr_hi)

    ds_in = gen_single_var_ds(CESM_cases, "ocn", "month_1", "pop", "h", "DIC_zint").sel(time=time_slice)
    da = remap_2d(ds_in, "DIC_zint", matrix, apply_area_corr=True)
    da.name = "intdic"
    da = conv_units(da, "mol m-2")
    remap_2d_write(GCB_name, ds_in, da, yr_lo, yr_hi)


def remap_2d(ds, varname, matrix, apply_area_corr=False):
    da_src = ds[varname]

    vals_src = da_src.fillna(0).values

    if apply_area_corr:
        src_grid = matrix.src_grid
        src_area_rad2 = (src_grid.area * src_grid.frac).reshape(src_grid.dims)
        POP_area_cm2 = xr.where(ds["KMT"] > 0, ds["TAREA"], 0.0).fillna(0).values
        rearth = ds["radius"].values
        mdl2src = POP_area_cm2 / rearth**2 / src_area_rad2
        vals_src *= mdl2src

    vals_dst = matrix_2d.remap_var(vals_src)

    dst_grid = matrix.dst_grid
    vals_dst = np.where(dst_grid.frac.reshape(dst_grid.dims) > 0, vals_dst, np.nan)

    da_dst = xr.DataArray(
        vals_dst,
        coords={"time": ds["time"], "lat": dst_grid.lat, "lon": dst_grid.lon},
        attrs=da_src.attrs,
    )
    da_dst.encoding = da_src.encoding
    del da_dst.encoding["coordinates"]
    da_dst["lat"].attrs = {"long_name": "latitude", "units": "degrees_north"}
    da_dst["lon"].attrs = {"long_name": "longitude", "units": "degrees_east"}

    return da_dst


def remap_2d_write(GCB_name, ds_in, da, yr_lo, yr_hi):
    tb_name = ds_in["time"].attrs["bounds"]
    ds_out = xr.Dataset({"time": ds_in["time"], tb_name: ds_in[tb_name], da.name: da})
    ds_out.encoding = ds_in.encoding
    ds_out.attrs["source_id"] = "CESM2"
    ds_out.attrs["institution_id"] = "NCAR"
    ds_out.attrs["variable_id"] = da.name
    ds_out.attrs["contact"] = "klindsay@ucar.edu"
    ds_out.attrs["creation_date"] = datetime.datetime.now().strftime("%Y-%m-%d")

    # ensure NaN _FillValues do not get generated
    for d in [ds_out.variables, ds_out.coords]:
        for var in d:
            if "_FillValue" not in ds_out[var].encoding:
                ds_out[var].encoding["_FillValue"] = None

    datestamp = datetime.datetime.now().strftime("%Y%m%d")

    timestring = f"{yr_lo:4}01-{yr_hi:4}12"
    path = f"{submission_dir}/{GCB_name}/{da.name}_CESM2_{GCB_name}_1_gr_{timestring}_v{datestamp}.nc"
    print(f"writing remapped monthly field to {path}")
    ds_out.to_netcdf(path)

In [7]:
matrix_2d_fname = 'POP_gx1v7_to_latlon_1x1_0E_conserve_20180914.nc'
matrix_2d = ocean_remap(matrix_2d_fname)

for GCB_name in GCB_metadata:
    remap_2d_driver(GCB_name, GCB_metadata[GCB_name]["cases"], matrix_2d)

A
['g.e22.GOMIPECOIAF_JRA-1p4-2018.TL319_g17.GCB_2022.BDRD.001', 'g.e22.GOMIPECOIAF_JRA-1p4-2018.TL319_g17.GCB_2022.BDRD.002']
writing remapped monthly field to /glade/campaign/cesm/development/bgcwg/projects/GCB_2022/submission/A/sfco2_CESM2_A_1_gr_195901-202112_v20220712.nc
B
['g.e22.GOMIPECOIAF_JRA-1p4-2018.TL319_g17.GCB_2022.BCRC.001']
writing remapped monthly field to /glade/campaign/cesm/development/bgcwg/projects/GCB_2022/submission/B/sfco2_CESM2_B_1_gr_195901-202112_v20220712.nc
C
['g.e22.GOMIPECOIAF_JRA-1p4-2018.TL319_g17.GCB_2022.BDRC.001']
writing remapped monthly field to /glade/campaign/cesm/development/bgcwg/projects/GCB_2022/submission/C/sfco2_CESM2_C_1_gr_195901-202112_v20220712.nc
D
['g.e22.GOMIPECOIAF_JRA-1p4-2018.TL319_g17.GCB_2022.BCRD.001', 'g.e22.GOMIPECOIAF_JRA-1p4-2018.TL319_g17.GCB_2022.BCRD.002']
writing remapped monthly field to /glade/campaign/cesm/development/bgcwg/projects/GCB_2022/submission/D/sfco2_CESM2_D_1_gr_195901-202112_v20220712.nc
