In [1]:
from datetime import date, datetime, timezone

import cftime
import git
import matplotlib.pyplot as plt
import numpy as np
import xarray as xr

import config
import util

In [2]:
# script identifier to go in generated file

repo = 'github.com/marbl-ecosys/marbl-forcing'
sha = git.Repo(search_parent_directories=True).head.object.hexsha
script_fname = 'Fe_aeolian_dep/gen_cesm2_omip_presaero.ipynb'
id_string = f'{repo}/tree/{sha}/{script_fname}'

In [3]:
# input files

dirin = f'{config.inputdata}/atm/cam/chem/trop_mozart_aero/aero'
fname_hist = f'{dirin}/aerosoldep_WACCM.ensmean_monthly_hist_1849-2015_0.9x1.25_CMIP6_c180926.nc'

print(fname_hist)

# drop date and datesec, because they are not used by streams
drop_vars = ('date', 'datesec')
ds_hist = xr.open_dataset(fname_hist, drop_variables=drop_vars)

yr_range_hist = (ds_hist.time.values[0].year, ds_hist.time.values[-1].year)
print(yr_range_hist)

/glade/p/cesmdata/cseg/inputdata/atm/cam/chem/trop_mozart_aero/aero/aerosoldep_WACCM.ensmean_monthly_hist_1849-2015_0.9x1.25_CMIP6_c180926.nc
(1849, 2015)


In [4]:
# details on file being generated

datestamp = date.today().strftime("%y%m%d")

def yr_start_cycle0_fosi(yr_range_fosi, cycle_cnt_fosi):
    yr_cnt_fosi = yr_range_fosi[1] - yr_range_fosi[0] + 1
    return yr_range_fosi[0] - (cycle_cnt_fosi - 1) * yr_cnt_fosi

def yr_range_fmt(yr_range):
    return f'{yr_range[0]:04d}-{yr_range[1]:04d}'

yr_range_omip1 = (1948, 2009)
yr_start_cycle0_omip1 = yr_start_cycle0_fosi(yr_range_omip1, cycle_cnt_fosi=6)

yr_range_omip2 = (1958, 2018)
yr_start_cycle0_omip2 = yr_start_cycle0_fosi(yr_range_omip2, cycle_cnt_fosi=6)

yr_lo_omip = min([yr_start_cycle0_omip1, yr_start_cycle0_omip2]) - 1
yr_range_prehist = (yr_lo_omip, yr_range_hist[0] - 1)
fname_prehist = f'aerosoldep_WACCM.ensmean_{yr_range_hist[0]:04d}vals_{yr_range_fmt(yr_range_prehist)}_CMIP6_c{datestamp}.nc'

yr_hi_omip = max([yr_range_omip1[1], yr_range_omip2[1]]) + 1
yr_range_posthist = (yr_range_hist[1] + 1, yr_hi_omip)
fname_posthist = f'aerosoldep_WACCM.ensmean_{yr_range_hist[1]:04d}vals_{yr_range_fmt(yr_range_posthist)}_CMIP6_c{datestamp}.nc'

In [5]:
# construct time values for new datasets

def time_vars(yr_range, time_units):
    calendar = 'noleap'

    days_1yr = np.array([31.0, 28.0, 31.0, 30.0, 31.0, 30.0, 31.0, 31.0, 30.0, 31.0, 30.0, 31.0])
    nyrs = yr_range[1] - yr_range[0] + 1
    time_edges = np.insert(np.cumsum(np.tile(days_1yr, nyrs)), 0, 0)
    time_edges += cftime.date2num(cftime.DatetimeNoLeap(yr_range[0], 1, 1), time_units, calendar='noleap')
    time_bnds_vals = np.stack((time_edges[:-1], time_edges[1:]), axis=1)
    time_vals = np.mean(time_bnds_vals, axis=1)

    time_var = xr.DataArray(time_vals, dims='time', coords={'time':time_vals},
                            attrs={'long_name':'time', 'units':time_units, 'calendar':calendar, 'bounds':'time_bnds'})
    time_bnds_var = xr.DataArray(time_bnds_vals, dims=('time', 'd2'), coords={'time':time_var})
    
    return time_var, time_bnds_var

time_var_prehist, time_bnds_var_prehist = time_vars(yr_range_prehist, ds_hist.time.encoding['units'])

time_var_posthist, time_bnds_var_posthist = time_vars(yr_range_posthist, ds_hist.time.encoding['units'])

In [6]:
# construct new dataset, preserving grid and domain variables from ds_hist

def gen_ds_out(time_var, time_bnds_var, yr_hist):
    ds_out = xr.Dataset({'time': time_var, 'time_bnds': time_bnds_var,
                         'lon': ds_hist.lon, 'lat': ds_hist.lat})

    for varname, var_in in ds_hist.data_vars.items():
        if 'lat' in var_in.dims and 'lon' in var_in.dims:
            var_in_slice = var_in.sel(time=slice(f'{yr_hist:04d}-01-01', f'{(yr_hist+1):04d}-01-01'))
            nyrs = len(time_var.values) // 12
            var_out_vals = np.tile(var_in_slice, (nyrs, 1, 1))
            var_out = xr.DataArray(var_out_vals, dims=var_in.dims,
                                   coords={'time':time_var, 'lat':ds_hist.lat, 'lon':ds_hist.lon})
            var_out.attrs = var_in.attrs
            ds_out[varname] = var_out

    datestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d")
    ds_out.attrs['history'] = f'created by {id_string} on {datestamp}'
    ds_out.attrs['input_file_list'] = fname_hist
    
    return ds_out

ds_out = gen_ds_out(time_var_prehist, time_bnds_var_prehist, yr_range_hist[0])
util.ds_clean(ds_out).to_netcdf(fname_prehist, unlimited_dims='time')

ds_out = gen_ds_out(time_var_posthist, time_bnds_var_posthist, yr_range_hist[1])
util.ds_clean(ds_out).to_netcdf(fname_posthist, unlimited_dims='time')
# utils.ds_clean(ds_out).to_netcdf(fname_out, unlimited_dims='time')