# Create 0.05-degree monthly average gridded dataset (all variables) over North American Region

In [1]:
import sys

sys.path.insert(0, "../source")

import numpy as np
import pandas as pd
import xarray as xr

from data_utils import apply_gridded_average


In [2]:
res = 0.05
extents = {"lon0_b": -125, "lon1_b": -65, "lat0_b": 22, "lat1_b": 58}

ds_sif = xr.open_dataset("../data/production/OCO2_Lite_SIF740.nc4")
ds_xco2 = xr.open_dataset("../data/production/OCO2_Lite_XCO2.nc4")
ds_landcover = xr.open_dataset("../data/production/land_cover_north_america.nc4")

# Regrid OCO-2 datasets to 0.05-degree monthly averages
ds_grid_sif = apply_gridded_average(ds_sif, **extents, d_lon=res, d_lat=res)
ds_grid_xco2 = apply_gridded_average(ds_xco2, **extents, d_lon=res, d_lat=res)


In [3]:
# Combine the datasets
# CMG grid has nearly identical coordinates to gridded OCO-2, but it's not exact; set OCO-2
# coords to CMG and combine datasets
ds_grid = xr.merge([ds_grid_sif, ds_grid_xco2], join="outer").assign_coords(
    {"lat": ds_landcover["lat"].data, "lon": ds_landcover["lon"].data}
)
ds_grid


In [4]:
ds_grid.to_netcdf("../data/production/OCO2_005deg_monthly_north_america.nc4", format="NETCDF4")


In [5]:
# save data in tabular format for use in establishing basis functions
df = ds_grid.to_dataframe().reset_index()

df_sif = df[["lon", "lat", "sif"]].dropna().reset_index(drop=True)
df_xco2 = df[["lon", "lat", "xco2"]].dropna().reset_index(drop=True)


In [6]:
df_sif.to_csv("../data/production/OCO2_SIF_005deg.csv")
df_xco2.to_csv("../data/production/OCO2_XCO2_005deg.csv")
