In [1]:
import numpy as np
import pandas as pd
import xarray as xr
import xesmf as xe
import nctoolkit as nc

import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
import cartopy.crs as ccrs
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER

1 file was created by nctoolkit in prior or current sessions. Consider running deep_clean!
nctoolkit is using Climate Data Operators version 2.0.5


In [2]:
nc.deep_clean()

## Setup

In [3]:
# produce 1-degree grid cell areas
# read in a single GEOS Chem ouput file, regrid AREA variable with nctoolkit, and update areas
ds_area = nc.open_data(
    "../1_transport/intermediates/GEOS_Chem/runs/run.v12.3.2.base/output/"
    "GEOSChem.SpeciesConc.20140901_0000z.nc4"
)
ds_area.subset(variables = "AREA")
ds_area.to_latlon(lon = [-179.5, 179.5], lat = [-89.5, 89.5], res = [1, 1])
ds_area.cell_area(join=False)
target_grid_area = ds_area.to_xarray()["cell_area"]
target_grid_area

In [4]:
# dataset configuration
date_range = pd.date_range(start="2014-09", end="2017-04", freq="1M")
target_grid = xe.util.grid_global(1, 1)
target_grid

In [15]:
def get_mozart_pressure_edges(ds):
    # compute pressure edges in units hPA and reindex to follow GEOS Chem convention (i.e.,
    # first element is surface pressure)
    # equation: https://www2.acom.ucar.edu/gcm/mozart-4
    return (ds["P0"] * ds["hyai"] + ds["PS"] * ds["hybi"]) * 0.01


def compute_pressure_weights(da_pressure_edge):
    surface_pressure = da_pressure_edge.max(dim="ilev")
    weights = (abs(da_pressure_edge.diff("ilev")) / surface_pressure).values
    return xr.DataArray(
        weights,
        dims=["lev", "time", "lat", "lon"],
    )


## Compute pressure weighted average at first timestep for both models and map the difference

In [23]:
with xr.open_dataset(
    "../1_transport/intermediates/MOZART/output/BasisFnsUpdated/201409/"
    "BasisFnsUpdated.mz4.h0.2014-09-01-03600.nc",
    decode_times=False,
) as ds:
    ds["pressure_edge"] = get_mozart_pressure_edges(ds)
    ds["pressure_weights"] = compute_pressure_weights(ds["pressure_edge"])
    ds_mozart = ds[["CO2_VMR_avrg", "pressure_weights"]]
    da_mozart_xco2 = (ds_mozart["CO2_VMR_avrg"] * ds_mozart["pressure_weights"]).sum(
        dim="lev"
    ).to_dataset(name="xco2")
    
    # TODO: determine how to do conservative regridding
    # ds_mozart_xco2 = ds[["CO2_VMR_avrg", "pressure_edge"]]
    da_mozart_xco2 = xe.Regridder(da_mozart_xco2, target_grid, method="conservative")
    # (
    #     ds_mozart_xco2
    # )


# with xr.open_mfdataset(
#     [
#         "../1_transport/intermediates/GEOS_Chem/runs/run.v12.3.2.base/output/"
#         "GEOSChem.LevelEdgeDiags.20140901_0000z.nc4",
#         "../1_transport/intermediates/GEOS_Chem/runs/run.v12.3.2.base/output/"
#         "GEOSChem.SpeciesConc.20140901_0000z.nc4",
#     ]
# ) as ds:
#     ds = ds.isel(time=0)
#     regridder_geoschem = xe.Regridder(
#         ds[["SpeciesConc_CO2", "Met_PEDGE"]], target_grid, method="conservative"
#     )

ds_mozart_xco2


<xarray.Dataset>
Dimensions:  (lat: 96, lon: 144, time: 720)
Coordinates:
  * lat      (lat) float32 -90.0 -88.11 -86.21 -84.32 ... 84.32 86.21 88.11 90.0
  * lon      (lon) float32 0.0 2.5 5.0 7.5 10.0 ... 350.0 352.5 355.0 357.5
  * time     (time) float64 7.358e+05 7.358e+05 ... 7.359e+05 7.359e+05
Data variables:
    xco2     (time, lat, lon) float32 0.0003943 0.0003943 ... 0.0003927


KeyError: "Dataset.cf does not understand the key 'longitude'. Use 'repr(Dataset.cf)' (or 'Dataset.cf' in a Jupyter environment) to see a list of key names that can be interpreted."

## Suface analyses

In [None]:
mozart_paths = []
for month in date_range:
    yyyy, mm, _ = str(month).split("-")
    mozart_paths.append(
        f"../1_transport/intermediates/MOZART/output/BasisFnsUpdated/{yyyy}{mm}/"
        f"BasisFnsUpdated.mz4.h0.{yyyy}-{mm}-01-03600.nc"
    )

## Vertical column analyses (pressure weighted column average)

division by surface pressure

get monthly xco2 datasets in preprocessing

In [7]:
# precompute grid weights
    
with xr.open_dataset(
    "../1_transport/intermediates/MOZART/output/BasisFnsUpdated/201409/"
    "BasisFnsUpdated.mz4.h0.2014-09-01-03600.nc",
    decode_times=False,
) as ds:
    ds["pressure_edge"] = get_mozart_pressure_edges(ds) 
    regridder_mozart = xe.Regridder(
        ds[["CO2_VMR_avrg", "pressure_edge"]], target_grid, "conservative"
    )

with xr.open_mfdataset(
    [
        "../1_transport/intermediates/GEOS_Chem/runs/run.v12.3.2.base/output/"
        "GEOSChem.LevelEdgeDiags.20140901_0000z.nc4",
        "../1_transport/intermediates/GEOS_Chem/runs/run.v12.3.2.base/output/"
        "GEOSChem.SpeciesConc.20140901_0000z.nc4",
    ]
) as ds:
    regridder_geoschem = xe.Regridder(
        ds[["SpeciesConc_CO2", "Met_PEDGE"]], target_grid, "conservative"
    )




In [4]:
# demo subset
mozart_paths =[
    "../1_transport/intermediates/MOZART/output/BasisFnsUpdated/201409/"
    "BasisFnsUpdated.mz4.h0.2014-09-01-03600.nc",
    "../1_transport/intermediates/MOZART/output/BasisFnsUpdated/201410/"
    "BasisFnsUpdated.mz4.h0.2014-10-01-03600.nc"
]

def prep_mozart(ds):
    # setup times correctly
    ds["time"] = pd.to_datetime(ds["date"].values, format="%Y%m%d") + pd.to_timedelta(
        ds["datesec"].values, unit="seconds"
    )
    # compute pressure edges (in units hPA)
    ds["pressure_edge"] = get_mozart_pressure_edges(ds)
    ds_mozart = ds[["CO2_VMR_avrg", "pressure_edge"]]

    # format according to GEOS Chem conventions
    ds_mozart = ds_mozart.reindex(ilev=ds_mozart.ilev[::-1])
    ds_mozart = ds_mozart.transpose("time", "ilev", "lev", "lat", "lon")

    # compute pressure deltas

     # shift longitude coordinate reference
    ds_mozart["lon"] = ds_mozart["lon"] - 180.0
    # regrid to 1x1 degree
    ds_mozart = regridder_mozart(ds_mozart)
    ds_mozart["area"] = target_grid_area
    

with xr.open_mfdataset(mozart_paths, preprocess=prep_mozart, parallel=True, decode_times=False) as ds:
    

# one observation for every hour of every day in study period, in units VMR, regridded
ds_mozart


Unnamed: 0,Array,Chunk
Bytes,305.95 GiB,10.06 GiB
Shape,"(22632, 56, 180, 360)","(744, 56, 180, 360)"
Count,126 Tasks,31 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 305.95 GiB 10.06 GiB Shape (22632, 56, 180, 360) (744, 56, 180, 360) Count 126 Tasks 31 Chunks Type float32 numpy.ndarray",22632  1  360  180  56,

Unnamed: 0,Array,Chunk
Bytes,305.95 GiB,10.06 GiB
Shape,"(22632, 56, 180, 360)","(744, 56, 180, 360)"
Count,126 Tasks,31 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,311.41 GiB,10.24 GiB
Shape,"(22632, 57, 180, 360)","(744, 57, 180, 360)"
Count,778 Tasks,31 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 311.41 GiB 10.24 GiB Shape (22632, 57, 180, 360) (744, 57, 180, 360) Count 778 Tasks 31 Chunks Type float32 numpy.ndarray",22632  1  360  180  57,

Unnamed: 0,Array,Chunk
Bytes,311.41 GiB,10.24 GiB
Shape,"(22632, 57, 180, 360)","(744, 57, 180, 360)"
Count,778 Tasks,31 Chunks
Type,float32,numpy.ndarray


In [5]:
# mozart_datetime_end = ds_mozart.time.values.max()

# # geoschem_level_edge_glob = (
# #     "../1_transport/intermediates/GEOS_Chem/runs/run.v12.3.2.base/output/"
# #     "GEOSChem.LevelEdgeDiags.*_0000z.nc4"
# # )
# geoschem_level_edge_glob = [
#     "../1_transport/intermediates/GEOS_Chem/runs/run.v12.3.2.base/output/"
#     "GEOSChem.LevelEdgeDiags.20140901_0000z.nc4",
#     "../1_transport/intermediates/GEOS_Chem/runs/run.v12.3.2.base/output/"
#     "GEOSChem.LevelEdgeDiags.20140902_0000z.nc4"
# ]
# with xr.open_mfdataset(geoschem_level_edge_glob) as ds:
#     da_pressure_level = ds["Met_PEDGE"]

# geoschem_spec_conc_glob = (
#     "../1_transport/intermediates/GEOS_Chem/runs/run.v12.3.2.base/output/"
#     "GEOSChem.SpeciesConc.*_0000z.nc4"
# )
geoschem_glob = [
    "../1_transport/intermediates/GEOS_Chem/runs/run.v12.3.2.base/output/"
    "GEOSChem.LevelEdgeDiags.20140901_0000z.nc4",
    "../1_transport/intermediates/GEOS_Chem/runs/run.v12.3.2.base/output/"
    "GEOSChem.LevelEdgeDiags.20140902_0000z.nc4",
    "../1_transport/intermediates/GEOS_Chem/runs/run.v12.3.2.base/output/"
    "GEOSChem.SpeciesConc.20140901_0000z.nc4",
    "../1_transport/intermediates/GEOS_Chem/runs/run.v12.3.2.base/output/"
    "GEOSChem.SpeciesConc.20140902_0000z.nc4"
]
# def extract_co2_variable(ds):
#     return ds[["SpeciesConc_CO2"]]

# with xr.open_mfdataset(geoschem_spec_conc_glob, preprocess=extract_co2_variable) as ds_geoschem:
#     ds_geoschem["Met_PEDGE"] = da_pressure_level
#     # clip to available mozart date range
#     ds_geoschem = ds_geoschem.where(ds_geoschem["time"] <= mozart_datetime_end, drop=True)
#     # regrid to 1x1 degree
#     ds_geoschem = xe.Regridder(ds_geoschem, target_grid, "conservative")(ds_geoschem)
#     ds_geoschem["area"] = target_grid_area

ds_geoschem=xr.open_mfdataset(geoschem_glob)

# one observation for every hour of every day in study period, in units mole per mole dry, regridded
ds_geoschem

Unnamed: 0,Array,Chunk
Bytes,256.78 GiB,278.83 MiB
Shape,"(22632, 47, 180, 360)","(24, 47, 180, 360)"
Count,7820 Tasks,943 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 256.78 GiB 278.83 MiB Shape (22632, 47, 180, 360) (24, 47, 180, 360) Count 7820 Tasks 943 Chunks Type float32 numpy.ndarray",22632  1  360  180  47,

Unnamed: 0,Array,Chunk
Bytes,256.78 GiB,278.83 MiB
Shape,"(22632, 47, 180, 360)","(24, 47, 180, 360)"
Count,7820 Tasks,943 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,262.24 GiB,284.77 MiB
Shape,"(22632, 48, 180, 360)","(24, 48, 180, 360)"
Count,7820 Tasks,943 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 262.24 GiB 284.77 MiB Shape (22632, 48, 180, 360) (24, 48, 180, 360) Count 7820 Tasks 943 Chunks Type float32 numpy.ndarray",22632  1  360  180  48,

Unnamed: 0,Array,Chunk
Bytes,262.24 GiB,284.77 MiB
Shape,"(22632, 48, 180, 360)","(24, 48, 180, 360)"
Count,7820 Tasks,943 Chunks
Type,float32,numpy.ndarray
