Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

C-iTRACE tracer pointing to NetCDF4 files #230

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions recipes/citrace_tracer/meta.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
title: 'C-iTRACE-tracers'
description: 'Coarse resolution, ocean-only simulation from 22ka to present using the isotope-enabled ocean component of CESM1'
pangeo_forge_version: '0.9.0'
pangeo_notebook_version: '2022.06.02'
recipes:
- id: citrace_tracer
object: 'recipe:recipe'
provenance:
providers:
- name: 'University of Colorado Boulder'
description: 'University of Colorado Boulder'
roles:
- producer
url: https://sites.google.com/colorado.edu/citrace
- name: 'University of Wisconsin-Madison'
description: 'University of Wisconsin-Madison'
roles:
- producer
url: https://sites.google.com/colorado.edu/citrace
- name: 'Ohio State University'
description: 'Ohio State University'
roles:
- producer
url: https://sites.google.com/colorado.edu/citrace
- name: 'UCAR/NCAR'
description: 'University Corporation for Atmospheric Research/National Center for Atmospheric Research'
roles:
- curator
- licensor
url: https://gdex.ucar.edu/dataset/204_ajahn.html
license: 'Creative Commons Attribution 4.0 International'
maintainers:
- name: 'Jordan Landers'
orcid: '0000-0001-9772-7617'
github: jordanplanders
bakery:
id: 'pangeo-ldeo-nsf-earthcube'
92 changes: 92 additions & 0 deletions recipes/citrace_tracer/recipe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
from pangeo_forge_recipes.patterns import ConcatDim, FilePattern, MergeDim
from pangeo_forge_recipes.recipes.xarray_zarr import XarrayZarrRecipe

# C-iTRACE covers a number of tracer quantities
variables = [
'd18O',
'ABIO_D14Cocn',
'ABIO_D14Catm',
'CISO_DIC_d13C',
'ND143',
'ND144',
'PD',
'SALT',
'TEMP',
'IAGE',
]


def make_url(time, variable):
url_d = {
'IAGE': 'https://figshare.com/ndownloader/files/38534591',
'd18O': 'https://figshare.com/ndownloader/files/38530430',
'ABIO_D14Cocn': 'https://figshare.com/ndownloader/files/38231892',
'ABIO_D14Catm': 'https://figshare.com/ndownloader/files/38231991',
'CISO_DIC_d13C': 'https://figshare.com/ndownloader/files/38526806',
'ND143': 'https://figshare.com/ndownloader/files/38232651',
'ND144': 'https://figshare.com/ndownloader/files/38232060',
'SALT': 'https://figshare.com/ndownloader/files/38541851',
'TEMP': 'https://figshare.com/ndownloader/files/38543534',
'PD': 'https://figshare.com/ndownloader/files/38543969',
}

url = url_d[variable]
return url


# these are single ~6 Gb files, each covering the full timeseries
time_concat_dim = ConcatDim('time', [0])
pattern = FilePattern(make_url, time_concat_dim, MergeDim(name='variable', keys=variables))


# clean up the dataset so that lat and lon are included as dimension coordinates
def postproc(ds):

import xarray as xr

variable = [var for var in ds.data_vars.keys() if 'bound' not in var][0]

coords = [key for key in ds.coords.keys()]
coord_d = {}
for coord_var in ['lat', 'lon']:
coord_d[coord_var] = [coord for coord in coords if coord_var in coord.lower()][0]

times = ds.coords['time'].values
lats = [
ds.coords[coord_d['lat']].values[ik][0]
for ik in range(len(ds.coords[coord_d['lat']].values))
]
lons = ds.coords[coord_d['lon']][0].values % 360

_ds = xr.Dataset()
_ds.coords['lat'] = (('lat'), lats)
_ds.coords['lon'] = (('lon'), lons)
_ds.coords['time'] = (('time'), times)

if 'z_t' in ds.coords:
z_ts = ds.coords['z_t'].values
_ds.coords['z_t'] = (('z_t'), z_ts)
coord_tuple = ('time', 'z_t', 'lat', 'lon')
else:
coord_tuple = ('time', 'lat', 'lon')

_ds[variable] = (coord_tuple, ds[variable].values)

_ds = _ds.sortby(['lon', 'lat'])
return _ds


# Create recipe object
# use subset_inputs to make the processing more tractable.
# using `target_chunks` instead of specifying `nitems_per_file`
# in ConcatDim massively decreased run time.
recipe = XarrayZarrRecipe(
pattern,
inputs_per_chunk=1,
consolidate_zarr=True,
subset_inputs={'time': 120},
target_chunks={'time': 1},
process_chunk=postproc,
copy_input_to_local_file=False,
xarray_open_kwargs={'decode_coords': True, 'use_cftime': True, 'decode_times': True},
)