# Generate Monthly CO2 Solubility from Daily SST and SSS

In [1]:
import glob
import os.path
import pprint

import distributed
import ncar_jobqueue
import numpy as np
import xarray as xr
import yaml

from utils import time_set_mid, path_replace

  from distributed.utils import tmpfile


In [2]:
xr.set_options(keep_attrs=True);

In [3]:
with open("GCB_metadata.yaml", mode="r") as fptr:
    GCB_metadata = yaml.safe_load(fptr)
pprint.pprint(GCB_metadata)

{'A': {'cases': ['g.e22.GOMIPECOIAF_JRA-1p4-2018.TL319_g17.GCB_2022.BDRD.001',
                 'g.e22.GOMIPECOIAF_JRA-1p4-2018.TL319_g17.GCB_2022.BDRD.002']},
 'B': {'cases': ['g.e22.GOMIPECOIAF_JRA-1p4-2018.TL319_g17.GCB_2022.BCRC.001']},
 'C': {'cases': ['g.e22.GOMIPECOIAF_JRA-1p4-2018.TL319_g17.GCB_2022.BDRC.001']},
 'D': {'cases': ['g.e22.GOMIPECOIAF_JRA-1p4-2018.TL319_g17.GCB_2022.BCRD.001',
                 'g.e22.GOMIPECOIAF_JRA-1p4-2018.TL319_g17.GCB_2022.BCRD.002']}}


In [4]:
tseries_root = "/glade/campaign/cesm/development/bgcwg/projects/GCB_2022"

In [5]:
def gen_CO2_alpha_driver(GCB_name, CESM_cases):
    print(GCB_name)
    print(CESM_cases)

    for case in CESM_cases:
        dir = f"{tseries_root}/{case}/output/ocn/proc/tseries/day_1"
        tos_paths = glob.glob(f"{dir}/{case}.pop.h.nday1.SST.*.nc")
        tos_paths.sort()
        sos_paths = glob.glob(f"{dir}/{case}.pop.h.nday1.SSS.*.nc")
        sos_paths.sort()
        for (tos_path, sos_path) in zip(tos_paths, sos_paths):
            ds = xr.open_dataset(tos_path, chunks={"time": 365})
            ds["SSS"] = xr.open_dataset(sos_path, chunks={"time": 365})["SSS"]
            ds = time_set_mid(ds, "time")

            ds_daily = ds.drop_vars(["SST", "SSS"])
            ds_daily["CO2_alpha"] = gen_CO2_alpha(ds)
            ds_monthly = gen_ds_monthly(ds_daily, ds_daily["CO2_alpha"])

            # ensure NaN _FillValues do not get generated
            for d in [ds_monthly.variables, ds_monthly.coords]:
                for var in d:
                    if "_FillValue" not in ds_monthly[var].encoding:
                        ds_monthly[var].encoding["_FillValue"] = None

            CO2_alpha_path = path_replace(
                tos_path,
                dir_replace={"day_1": "month_1"},
                base_replace={"h.nday1": "h", "SST": "CO2_alpha"},
                date_trunc_len=6,
            )

            print(f"writing {CO2_alpha_path}")
            ds_monthly.to_netcdf(CO2_alpha_path)


def gen_CO2_alpha(ds):
    # ref: Weiss, R. F.: Carbon dioxide in water and seawater: the solubility
    # of a non-ideal gas, Mar. Chem., 2, 203–215, 1974.
    # Eq 12, coefficients from last column of Table 1
    A1 = -60.2409
    A2 = 93.4517
    A3 = 23.3585
    B1 = 0.023517
    B2 = -0.023656
    B3 = 0.0047036
    T = 0.01 * (ds["SST"] + 273.15)  # convert from degC to cK
    Tinv = 1.0 / T
    S = ds["SSS"]
    alpha = np.exp(A1 + A2 * Tinv + A3 * np.log(T) + S * (B1 + T * (B2 + T * B3)))
    alpha.attrs["units"] = "mol kg-1 atm-1"
    alpha.attrs["long_name"] = "CO2 Solubility"
    alpha.encoding = ds["SST"].encoding

    return alpha


def gen_ds_monthly(ds, da):
    # assumes time values are mid-interval, so resample works

    # assume daily data, so no weighting is needed
    da_monthly = da.resample(time="M").mean()
    da_monthly.encoding = da.encoding

    # construct tb_monthly
    tb_name = ds["time"].attrs["bounds"]
    tb = ds[tb_name]
    tb_monthly = xr.concat(
        [tb[:, 0].resample(time="M").min(), tb[:, 1].resample(time="M").max()],
        tb.dims[-1],
    ).transpose()
    tb_monthly.attrs = ds[tb_name].attrs
    tb_monthly.encoding = ds[tb_name].encoding

    # generate Dataset
    data_vars = {tb_name: tb_monthly, da.name: da_monthly}
    ds_monthly = xr.Dataset(data_vars)
    ds_monthly.encoding = ds.encoding
    ds_monthly.attrs = ds.attrs
    if "time_period_freq" in ds_monthly.attrs:
        ds_monthly.attrs["time_period_freq"] = "month_1"

    # set time to end-of-month, to be compatible with other POP h files
    ds_monthly = ds_monthly.assign_coords({"time": tb_monthly[:, 1]})

    # propagate time metadata
    ds_monthly["time"].attrs = ds["time"].attrs
    ds_monthly["time"].encoding = ds["time"].encoding

    # propagate time-invariant fields
    for var in ds.variables:
        if "time" not in ds[var].dims:
            ds_monthly[var] = ds[var]

    return ds_monthly


# Obtain Computational Resources

In [6]:
cluster = ncar_jobqueue.NCARCluster(
    cores=1,  # The number of cores you want
    memory='4GB',  # Amount of memory
    processes=1,  # How many processes
    walltime='01:00:00',  # Amount of wall time
)

cluster.scale(8)

client = distributed.Client(cluster)
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/klindsay/GCB_2022/proxy/8787/status,

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/klindsay/GCB_2022/proxy/8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.12.206.48:40041,Workers: 0
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/klindsay/GCB_2022/proxy/8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


In [7]:
for GCB_name in GCB_metadata:
    gen_CO2_alpha_driver(GCB_name, GCB_metadata[GCB_name]["cases"])

A
['g.e22.GOMIPECOIAF_JRA-1p4-2018.TL319_g17.GCB_2022.BDRD.001', 'g.e22.GOMIPECOIAF_JRA-1p4-2018.TL319_g17.GCB_2022.BDRD.002']
writing /glade/campaign/cesm/development/bgcwg/projects/GCB_2022/g.e22.GOMIPECOIAF_JRA-1p4-2018.TL319_g17.GCB_2022.BDRD.001/output/ocn/proc/tseries/month_1/g.e22.GOMIPECOIAF_JRA-1p4-2018.TL319_g17.GCB_2022.BDRD.001.pop.h.CO2_alpha.177801-180212.nc
writing /glade/campaign/cesm/development/bgcwg/projects/GCB_2022/g.e22.GOMIPECOIAF_JRA-1p4-2018.TL319_g17.GCB_2022.BDRD.001/output/ocn/proc/tseries/month_1/g.e22.GOMIPECOIAF_JRA-1p4-2018.TL319_g17.GCB_2022.BDRD.001.pop.h.CO2_alpha.180301-182712.nc
writing /glade/campaign/cesm/development/bgcwg/projects/GCB_2022/g.e22.GOMIPECOIAF_JRA-1p4-2018.TL319_g17.GCB_2022.BDRD.001/output/ocn/proc/tseries/month_1/g.e22.GOMIPECOIAF_JRA-1p4-2018.TL319_g17.GCB_2022.BDRD.001.pop.h.CO2_alpha.182801-185212.nc
writing /glade/campaign/cesm/development/bgcwg/projects/GCB_2022/g.e22.GOMIPECOIAF_JRA-1p4-2018.TL319_g17.GCB_2022.BDRD.001/outpu

# Release Computational Resources

In [8]:
client.close()
cluster.close()
