In [1]:
import numpy as np
import pandas as pd
import xarray as xr


## Collect predictions; format and save coSIF datasets

In [2]:
months = {"202102": "2021-02", "202104": "2021-04", "202107": "2021-07", "202110": "2021-10"}

for label, month in months.items():
    if label == "202107":
        method = "cokriging"
    else:
        method = "kriging"

    ds = xr.open_dataset(f"../data/intermediate/{method}_results_{label}.nc4").drop_vars("time")
    ds._attrs = {}

    ds = (
        ds[["predictions", "rmspe"]]
        .rename({"predictions": "cosif_prediction", "rmspe": "cosif_rmspe"})
        .assign_coords(month=pd.to_datetime(month))
        .assign_attrs(units="W/m^2/sr/μm")
        .to_netcdf(f"../data/output/coSIF_{label}.nc4", format="NETCDF4")
    )


## Calculate average standard error ratios (compared to the data)

In [5]:
format_month = lambda ds: ds.expand_dims(month=[pd.to_datetime(ds["month"].data)], axis=-1)
ds_cosif = xr.open_mfdataset("../data/output/coSIF_*.nc4", preprocess=format_month)
ds_cosif


Unnamed: 0,Array,Chunk
Bytes,26.37 MiB,6.59 MiB
Shape,"(720, 1200, 4)","(720, 1200, 1)"
Count,17 Graph Layers,4 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 26.37 MiB 6.59 MiB Shape (720, 1200, 4) (720, 1200, 1) Count 17 Graph Layers 4 Chunks Type float64 numpy.ndarray",4  1200  720,

Unnamed: 0,Array,Chunk
Bytes,26.37 MiB,6.59 MiB
Shape,"(720, 1200, 4)","(720, 1200, 1)"
Count,17 Graph Layers,4 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,26.37 MiB,6.59 MiB
Shape,"(720, 1200, 4)","(720, 1200, 1)"
Count,17 Graph Layers,4 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 26.37 MiB 6.59 MiB Shape (720, 1200, 4) (720, 1200, 1) Count 17 Graph Layers 4 Chunks Type float64 numpy.ndarray",4  1200  720,

Unnamed: 0,Array,Chunk
Bytes,26.37 MiB,6.59 MiB
Shape,"(720, 1200, 4)","(720, 1200, 1)"
Count,17 Graph Layers,4 Chunks
Type,float64,numpy.ndarray


In [6]:
with xr.open_dataset(
    "../data/intermediate/OCO2_005deg_months2021_north_america_with_basis.nc4"
) as ds:
    ds_data = ds.rename(time="month").sel(month=ds_cosif.month)

ds_data


In [7]:
df_se = (
    xr.merge([ds_cosif["cosif_rmspe"], ds_data["sif_var"]])
    .squeeze()
    .astype(np.float64)
    .to_dataframe()
    .reset_index()
    .dropna(subset="sif_var")
)
df_se["me_sd"] = np.sqrt(df_se["sif_var"])  # measurement error standard deviation
df_se["se_ratio"] = df_se["cosif_rmspe"] / df_se["me_sd"]
error_ratio_means = df_se.groupby("month")["se_ratio"].mean()
error_ratio_means


month
2021-02-01    0.287380
2021-04-01    0.267288
2021-07-01    0.253762
2021-10-01    0.253446
Name: se_ratio, dtype: float64