In [1]:
import numpy as np
import pandas as pd
import xarray as xr
import xesmf as xe

In [2]:
study_timeperiod = pd.date_range(start="2014-09", end="2017-04", freq="1M")
print(f"Start: {study_timeperiod[0]}, End: {study_timeperiod[-1]}")

Start: 2014-09-30 00:00:00, End: 2017-03-31 00:00:00


# Construct inputs datasets

## MOZART

In [3]:
file_paths = []
for month in study_timeperiod:
    yyyy, mm, _ = str(month).split("-")
    file_paths.append(
        f"../1_transport/intermediates/MOZART/output/BasisFnsUpdated/{yyyy}{mm}/"
        f"BasisFnsUpdated.mz4.h0.{yyyy}-{mm}-01-03600.nc"
    )


In [4]:
def prepare_mozart_input(ds):
    ds["time"] = pd.to_datetime(ds.date.values, format="%Y%m%d") + pd.to_timedelta(
        ds.datesec.values, unit="seconds"
    )
    # extract co2 emissions array
    return ds["CO2_SRF_EMIS_avrg"]


# concatenate CO2_SRF_EMIS_avrg variable from MOZART diagnostics
with xr.open_mfdataset(file_paths, preprocess=prepare_mozart_input, decode_times=False) as ds:
    da_mozart = ds["CO2_SRF_EMIS_avrg"]

# one observation for every hour of every day in study period, in units kg/m^2/s
da_mozart


Unnamed: 0,Array,Chunk
Bytes,1.17 GiB,39.23 MiB
Shape,"(22632, 96, 144)","(744, 96, 144)"
Count,93 Tasks,31 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.17 GiB 39.23 MiB Shape (22632, 96, 144) (744, 96, 144) Count 93 Tasks 31 Chunks Type float32 numpy.ndarray",144  96  22632,

Unnamed: 0,Array,Chunk
Bytes,1.17 GiB,39.23 MiB
Shape,"(22632, 96, 144)","(744, 96, 144)"
Count,93 Tasks,31 Chunks
Type,float32,numpy.ndarray


In [5]:
# da_mozart.to_netcdf("../data/mozart_inputs.nc")

In [6]:
# define end of study period
mozart_datetime_end = da_mozart.time.values.max()

## GEOS Chem

In [7]:
def extract_co2_emission_geoschem(ds):
    return ds["EmisCO2_Total"]


geos_chem_path_glob = (
    "../1_transport/intermediates/GEOS_Chem/runs/run.v12.3.2.base/output/HEMCO_diagnostics.*.nc"
)
with xr.open_mfdataset(geos_chem_path_glob, preprocess=extract_co2_emission_geoschem) as ds:
    # compute the sum over the pressure levels
    da_geoschem = ds["EmisCO2_Total"].sum(dim=["lev"])

    # clip to available mozart date range
    da_geoschem = da_geoschem.where(da_geoschem["time"] <= mozart_datetime_end, drop=True)

# one observation for every hour of every day in study period, in units kg/m^2/s
da_geoschem


Unnamed: 0,Array,Chunk
Bytes,1.10 GiB,37.19 MiB
Shape,"(22632, 91, 144)","(744, 91, 144)"
Count,363 Tasks,31 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.10 GiB 37.19 MiB Shape (22632, 91, 144) (744, 91, 144) Count 363 Tasks 31 Chunks Type float32 numpy.ndarray",144  91  22632,

Unnamed: 0,Array,Chunk
Bytes,1.10 GiB,37.19 MiB
Shape,"(22632, 91, 144)","(744, 91, 144)"
Count,363 Tasks,31 Chunks
Type,float32,numpy.ndarray


In [8]:
# da_geoschem.to_netcdf("../data/geoschem_inputs.nc")

# Construct outputs datasets

In [9]:
target_grid = xr.Dataset(
    {
        "lat": (["lat"], np.arange(-89.5, 90.5, 1.0)),
        "lon": (["lon"], np.arange(-180, 180, 1.0)),
    }
)
target_grid

## MOZART

In [10]:
with xr.open_mfdataset(file_paths, decode_times=False) as ds:
    # setup times correctly
    ds["time"] = pd.to_datetime(ds["date"].values, format="%Y%m%d") + pd.to_timedelta(
        ds["datesec"].values, unit="seconds"
    )
    ds_mozart = ds[["CO2_VMR_avrg", "P0", "PS", "hyai", "hybi"]]
    # compute pressure edge
    ds_mozart["pressure_edge"] = (
        ds_mozart["P0"] * ds_mozart["hyai"] + ds_mozart["PS"] * ds_mozart["hybi"]
    ) / 100
     # shift longitude coordinate reference
    ds_mozart["lon"] = ds_mozart["lon"] - 180.0
    # regrid to 1x1 degree
    ds_mozart = xe.Regridder(ds_mozart, target_grid, "bilinear")(ds_mozart)

# one observation for every hour of every day in study period, in units VMR, regridded
ds_mozart


Unnamed: 0,Array,Chunk
Bytes,305.95 GiB,10.06 GiB
Shape,"(22632, 56, 180, 360)","(744, 56, 180, 360)"
Count,126 Tasks,31 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 305.95 GiB 10.06 GiB Shape (22632, 56, 180, 360) (744, 56, 180, 360) Count 126 Tasks 31 Chunks Type float32 numpy.ndarray",22632  1  360  180  56,

Unnamed: 0,Array,Chunk
Bytes,305.95 GiB,10.06 GiB
Shape,"(22632, 56, 180, 360)","(744, 56, 180, 360)"
Count,126 Tasks,31 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,5.46 GiB,183.91 MiB
Shape,"(22632, 180, 360)","(744, 180, 360)"
Count,126 Tasks,31 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 5.46 GiB 183.91 MiB Shape (22632, 180, 360) (744, 180, 360) Count 126 Tasks 31 Chunks Type float32 numpy.ndarray",360  180  22632,

Unnamed: 0,Array,Chunk
Bytes,5.46 GiB,183.91 MiB
Shape,"(22632, 180, 360)","(744, 180, 360)"
Count,126 Tasks,31 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,311.41 GiB,10.24 GiB
Shape,"(22632, 57, 180, 360)","(744, 57, 180, 360)"
Count,778 Tasks,31 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 311.41 GiB 10.24 GiB Shape (22632, 57, 180, 360) (744, 57, 180, 360) Count 778 Tasks 31 Chunks Type float32 numpy.ndarray",22632  1  360  180  57,

Unnamed: 0,Array,Chunk
Bytes,311.41 GiB,10.24 GiB
Shape,"(22632, 57, 180, 360)","(744, 57, 180, 360)"
Count,778 Tasks,31 Chunks
Type,float32,numpy.ndarray


## GEOS Chem

In [11]:
geoschem_levedge_glob = (
    "../1_transport/intermediates/GEOS_Chem/runs/run.v12.3.2.base/output/"
    "GEOSChem.LevelEdgeDiags.*_0000z.nc4"
)
with xr.open_mfdataset(geoschem_levedge_glob) as ds:
    da_pressure_level = ds["Met_PEDGE"]

geoschem_spec_glob = (
    "../1_transport/intermediates/GEOS_Chem/runs/run.v12.3.2.base/output/"
    "GEOSChem.SpeciesConc.*_0000z.nc4"
)
with xr.open_mfdataset(geoschem_spec_glob) as ds:
    ds_geoschem = ds[["SpeciesConc_CO2", "AREA"]]
    ds_geoschem["Met_PEDGE"] = da_pressure_level
    # clip to available mozart date range
    ds_geoschem = ds_geoschem.where(ds_geoschem["time"] <= mozart_datetime_end, drop=True)
    # regrid to 1x1 degree
    ds_geoschem = xe.Regridder(ds_geoschem, target_grid, "bilinear")(ds_geoschem)

# one observation for every hour of every day in study period, in units mole per mole dry, regridded
ds_geoschem


Unnamed: 0,Array,Chunk
Bytes,256.78 GiB,278.83 MiB
Shape,"(22632, 47, 180, 360)","(24, 47, 180, 360)"
Count,7820 Tasks,943 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 256.78 GiB 278.83 MiB Shape (22632, 47, 180, 360) (24, 47, 180, 360) Count 7820 Tasks 943 Chunks Type float32 numpy.ndarray",22632  1  360  180  47,

Unnamed: 0,Array,Chunk
Bytes,256.78 GiB,278.83 MiB
Shape,"(22632, 47, 180, 360)","(24, 47, 180, 360)"
Count,7820 Tasks,943 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,5.46 GiB,5.93 MiB
Shape,"(22632, 180, 360)","(24, 180, 360)"
Count,8854 Tasks,943 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 5.46 GiB 5.93 MiB Shape (22632, 180, 360) (24, 180, 360) Count 8854 Tasks 943 Chunks Type float32 numpy.ndarray",360  180  22632,

Unnamed: 0,Array,Chunk
Bytes,5.46 GiB,5.93 MiB
Shape,"(22632, 180, 360)","(24, 180, 360)"
Count,8854 Tasks,943 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,262.24 GiB,284.77 MiB
Shape,"(22632, 48, 180, 360)","(24, 48, 180, 360)"
Count,7820 Tasks,943 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 262.24 GiB 284.77 MiB Shape (22632, 48, 180, 360) (24, 48, 180, 360) Count 7820 Tasks 943 Chunks Type float32 numpy.ndarray",22632  1  360  180  48,

Unnamed: 0,Array,Chunk
Bytes,262.24 GiB,284.77 MiB
Shape,"(22632, 48, 180, 360)","(24, 48, 180, 360)"
Count,7820 Tasks,943 Chunks
Type,float32,numpy.ndarray
