In [1]:
import pandas as pd
import xarray as xr

# Utils
from sheerwater_benchmarking.utils import start_remote, salient_secret

# Salient functions
from salientsdk.skill import _crps_core
import salientsdk as sk

In [2]:
start_remote(remote_name='genevieve', remote_config='xlarge_cluster')

Output()

## Run tests on Salient evaluation period, for a specific variable and lead

In [3]:
start_time = '2015-01-01'
end_time = '2022-12-31'
variable = 'precip'
var = {"tmp2m": "temp", "precip": "precip"}[variable] # salient naming
metric = 'crps'
region = 'africa'
mask = None
lead = 'week3'
timescale = 'sub-seasonal'

## Pull both forecasts and gt directly from the bucket

In [4]:
filename = f'gs://sheerwater-datalake/salient-data/v9/africa/{var}_{timescale}/blend'
fcst_ds = xr.open_zarr(filename)
fcst_ds = fcst_ds['vals'].to_dataset()
fcst_ds = fcst_ds.rename(vals=variable)
fcst_ds = fcst_ds.sel(forecast_date=slice(start_time, end_time))
fcst_ds

Unnamed: 0,Array,Chunk
Bytes,44.19 GiB,41.59 MiB
Shape,"(1088, 5, 300, 316, 23)","(1, 5, 300, 316, 23)"
Dask graph,1088 chunks in 3 graph layers,1088 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 44.19 GiB 41.59 MiB Shape (1088, 5, 300, 316, 23) (1, 5, 300, 316, 23) Dask graph 1088 chunks in 3 graph layers Data type float32 numpy.ndarray",5  1088  23  316  300,

Unnamed: 0,Array,Chunk
Bytes,44.19 GiB,41.59 MiB
Shape,"(1088, 5, 300, 316, 23)","(1, 5, 300, 316, 23)"
Dask graph,1088 chunks in 3 graph layers,1088 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [5]:
filename = f'gs://sheerwater-datalake/salient-data/v9/africa/{var}_{timescale}/truth'
gt_ds = xr.open_zarr(filename)
gt_ds = gt_ds['vals_actual'].to_dataset()
gt_ds = gt_ds.rename(vals_actual=variable)
gt_ds = gt_ds.sel(forecast_date=fcst_ds.forecast_date)
gt_ds

Unnamed: 0,Array,Chunk
Bytes,1.92 GiB,1.81 MiB
Shape,"(1088, 5, 300, 316)","(1, 5, 300, 316)"
Dask graph,1088 chunks in 3 graph layers,1088 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.92 GiB 1.81 MiB Shape (1088, 5, 300, 316) (1, 5, 300, 316) Dask graph 1088 chunks in 3 graph layers Data type float32 numpy.ndarray",1088  1  316  300  5,

Unnamed: 0,Array,Chunk
Bytes,1.92 GiB,1.81 MiB
Shape,"(1088, 5, 300, 316)","(1, 5, 300, 316)"
Dask graph,1088 chunks in 3 graph layers,1088 chunks in 3 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [6]:
point_lon, point_lat = (25.125, 0.125)
met_ds = _crps_core(observations=gt_ds, forecasts=fcst_ds, qnt_dim='quantiles')
met_ds = met_ds.mean('forecast_date')
met = met_ds.sel(lon=point_lon, lat=point_lat)[metric].compute()

In [7]:
print(met.to_pandas())

lead
1    0.947658
2    1.019598
3    1.094369
4    1.100406
5    1.105346
Name: crps, dtype: float64


In [8]:
username, password = salient_secret()
sk.login(username, password)

loc = sk.Location(point_lon, point_lat) # Single point eval (lon, lat)

# The variable that we'll be evaluating.
fld = "vals"
timescale = "sub-seasonal"
ref_model = "clim"  # Works across all timescale values.

skill_summ = pd.read_csv(
    sk.hindcast_summary(
        loc=loc,
        metric="crps",
        variable=var,
        timescale=timescale,
        reference=ref_model,
        split_set="test",
        force=True
    )
)
print(var, timescale, ref_model)
print(skill_summ.drop(columns="Reference Model"))

precip sub-seasonal clim
     Lead  Reference CRPS  Salient CRPS  Salient CRPS Skill Score (%)
0  Week 1            0.03          0.02                           NaN
1  Week 2            0.03          0.03                           NaN
2  Week 3            0.03          0.03                           NaN
3  Week 4            0.03          0.03                           NaN
4  Week 5            0.03          0.03                           NaN


In [9]:
print(var)

precip
