In [1]:
from datetime import datetime, timedelta

import pandas as pd
import xarray as xr

In [2]:
study_timeperiod = pd.date_range(start="2014-09", end="2017-04", freq="1M")
print(f"Start: {study_timeperiod[0]}, End: {study_timeperiod[-1]}")

Start: 2014-09-30 00:00:00, End: 2017-03-31 00:00:00


# Construct inputs datasets

## MOZART

In [3]:
file_paths = []
for month in study_timeperiod:
    yyyy, mm, _ = str(month).split("-")
    file_paths.append(
        f"../1_transport/intermediates/MOZART/output/BasisFnsUpdated/{yyyy}{mm}/"
        f"BasisFnsUpdated.mz4.h0.{yyyy}-{mm}-01-03600.nc"
    )


In [4]:
def round_nearest_hour(t):
    # round input datetime to nearest hour by adding a timedelta hour if minute >= 30
    return t.replace(second=0, microsecond=0, minute=0, hour=t.hour) + timedelta(
        hours=t.minute // 30
    )

def from_gregorian(t):
    # NOTE: int() gives floor which is what we want here
    date_time = datetime.fromordinal(int(t)) + timedelta(days=t % 1)
    return round_nearest_hour(date_time)


def prepare_mozart_file(ds):
    # format gregorian ordinal time as datetime
    ds["time"] = pd.to_datetime([from_gregorian(t) for t in ds.time.values])
    # extract co2 emissions array
    return ds["CO2_SRF_EMIS_avrg"]


# concatenate CO2_SRF_EMIS_avrg variable from MOZART diagnostics
with xr.open_mfdataset(file_paths, preprocess=prepare_mozart_file, decode_times=False) as ds:
    # for some reason, the files are off by a year
    ds["time"] = ds["time"].to_series() - pd.DateOffset(years=1)
    da_mozart = ds["CO2_SRF_EMIS_avrg"]

# one observation for every hour of every day in study period, in units kg/m^2/s
da_mozart

Unnamed: 0,Array,Chunk
Bytes,1.17 GiB,39.23 MiB
Shape,"(22632, 96, 144)","(744, 96, 144)"
Count,93 Tasks,31 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.17 GiB 39.23 MiB Shape (22632, 96, 144) (744, 96, 144) Count 93 Tasks 31 Chunks Type float32 numpy.ndarray",144  96  22632,

Unnamed: 0,Array,Chunk
Bytes,1.17 GiB,39.23 MiB
Shape,"(22632, 96, 144)","(744, 96, 144)"
Count,93 Tasks,31 Chunks
Type,float32,numpy.ndarray


In [5]:
da_mozart.to_netcdf("../data/mozart_inputs.nc")

In [6]:
# define end of study period
mozart_datetime_end = da_mozart.time.values.max()

## GEOS Chem

In [7]:
def extract_co2_emission_geoschem(ds):
    return ds["EmisCO2_Total"]


geos_chem_path_glob = (
    "../1_transport/intermediates/GEOS_Chem/runs/run.v12.3.2.base/output/HEMCO_diagnostics.*.nc"
)
with xr.open_mfdataset(geos_chem_path_glob, preprocess=extract_co2_emission_geoschem) as ds:
    # compute the sum over the pressure levels
    da_geoschem = ds["EmisCO2_Total"].sum(dim=["lev"])

    # clip to available mozart date range
    da_geoschem = da_geoschem.where(da_geoschem["time"] <= mozart_datetime_end, drop=True)

# one observation for every hour of every day in study period, in units kg/m^2/s
da_geoschem


Unnamed: 0,Array,Chunk
Bytes,1.10 GiB,37.19 MiB
Shape,"(22632, 91, 144)","(744, 91, 144)"
Count,363 Tasks,31 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.10 GiB 37.19 MiB Shape (22632, 91, 144) (744, 91, 144) Count 363 Tasks 31 Chunks Type float32 numpy.ndarray",144  91  22632,

Unnamed: 0,Array,Chunk
Bytes,1.10 GiB,37.19 MiB
Shape,"(22632, 91, 144)","(744, 91, 144)"
Count,363 Tasks,31 Chunks
Type,float32,numpy.ndarray


In [8]:
da_geoschem.to_netcdf("../data/geoschem_inputs.nc")

# Construct outputs datasets