In [2]:
import numpy as np
import pandas as pd
import xarray as xr

## Setup

In [26]:
study_timeperiod = pd.date_range(start="2014-09", end="2017-04", freq="1M").drop(["2015-05-31"])
print(f"Start: {study_timeperiod[0]}, End: {study_timeperiod[-1]}")
print("NOTE: 2015-05 removed")

Start: 2014-09-30 00:00:00, End: 2017-03-31 00:00:00
NOTE: 2015-05 removed


## Inputs

In [27]:
# mozart
file_paths = []
for month in study_timeperiod:
    yyyy, mm, _ = str(month).split("-")
    file_paths.append(
        f"../1_transport/intermediates/MOZART/output/BasisFnsUpdated/{yyyy}{mm}/"
        f"BasisFnsUpdated.mz4.h0.{yyyy}-{mm}-01-03600.nc"
    )


In [1]:
# # concatenate CO2_SRF_EMIS_avrg variable from MOZART diagnostics
# def extract_co2_emission_mozart(ds):
#     return ds["CO2_SRF_EMIS_avrg"]

# ds = xr.open_mfdataset(file_paths, preprocess=extract_co2_emission_mozart, decode_times=False)
# ds


# Issues: 
# - In each "yyyymm" folder (under MOZART/output/BasisFnsUpdated), there are 4 h0 files -- the month indicated by the folder, and for the following three months. If I want to produce a continuous version of the CO2_SRF_EMIS_avrg variable, should I be pulling that from the h0 file for the month indicated by the corresponding folder? Also, what do the files for the other three months represent?
# - Folders for 2015-05 through 2016-01 (inclusive) are missing
# - The "yyyymm" folders (under MOZART/output/BasisFnsUpdated) start with 201409 and end with 201703, but the GEOS-Chem equivalents (HEMCO_diagnostics files under GEOS_Chem/runs/run.v12.3.2.base/output) run from 201409 through 201706 (with no interruption). Do we need to account for the difference of three months at the end when comparing these diagnostics? Would that come from the other three files in the MOZART/output/BasisFnsUpdated/201703 directory?

In [6]:
# geos chem
def extract_co2_emission_geoschem(ds):
    return ds["EmisCO2_Total"]

geos_chem_path_glob = "../1_transport/intermediates/GEOS_Chem/runs/run.v12.3.2.base/output/HEMCO_diagnostics.*.nc"
with xr.open_mfdataset(geos_chem_path_glob, preprocess=extract_co2_emission_geoschem) as ds:
    da = (
        # compute the sum over the pressure levels
        ds["EmisCO2_Total"].sum(dim=["lev"])
        # temporal interval is hourly, multiply by 3600 (s/hr)
        # multiply by X (m^2 / grid cell)
    )

da

Unnamed: 0,Array,Chunk
Bytes,1.21 GiB,37.19 MiB
Shape,"(24816, 91, 144)","(744, 91, 144)"
Count,238 Tasks,34 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.21 GiB 37.19 MiB Shape (24816, 91, 144) (744, 91, 144) Count 238 Tasks 34 Chunks Type float32 numpy.ndarray",144  91  24816,

Unnamed: 0,Array,Chunk
Bytes,1.21 GiB,37.19 MiB
Shape,"(24816, 91, 144)","(744, 91, 144)"
Count,238 Tasks,34 Chunks
Type,float32,numpy.ndarray


In [7]:
ds.time

In [9]:
da.lon