In [2]:
%load_ext autoreload
%autoreload 2
import sys
from pathlib import Path
path = str(Path.cwd().parent)
print(path)
sys.path.insert(1, path)

/home/ubuntu/varios/skforecast


CRPS is a proper scoring rule used to assess the accuracy of probabilistic forecasts. When working with a prediction interval, the calculation assumes a uniform (or possibly triangular) distribution over the interval.

Steps to calculate CRPS for an interval forecast:

1) Define the interval: If your prediction interval is [L,U], you assume the forecast distribution is uniform over this range.

2) Compute the CRPS: For a uniform distribution, there's an analytical formula for CRPS, given the true value x, and interval bounds L (lower) and U (upper).

For a uniform distribution over an interval \([L, U]\):

CRPS(x, [L, U]) = 
\begin{cases} 
\frac{(U - L)}{3} - \frac{(x - L)^2}{2(U - L)} & \text{if } x < L \\[10pt]
\frac{(U - L)}{3} - \frac{(U - x)^2 + (x - L)^2}{2(U - L)} & \text{if } L \le x \le U \\[10pt]
\frac{(U - L)}{3} - \frac{(U - x)^2}{2(U - L)} & \text{if } x > U 
\end{cases}

In [13]:
import pandas as pd

def crps_uniform(x, L, U):
    if L >= U:
        raise ValueError("Lower bound must be less than upper bound.")
        
    if x < L:
        return ((U - L) / 3) - ((x - L) ** 2) / (2 * (U - L))
    elif x > U:
        return ((U - L) / 3) - ((U - x) ** 2) / (2 * (U - L))
    else:
        return ((U - L) / 3) - (( (U - x) ** 2 + (x - L) ** 2) / (2 * (U - L)))

# Example:
x_true = 3.5  # Observed value
lower_bound = 2.0
upper_bound = 5.0

crps_value = crps_uniform(x_true, lower_bound, upper_bound)
print(f"CRPS: {crps_value}")

CRPS: 0.25


In [14]:
# Example DataFrame
data = {
    'y_true': [3.5, 4.2, 2.0],
    'lower_bound': [2.0, 3.5, 1.0],
    'upper_bound': [5.0, 6.0, 3.0]
}

df = pd.DataFrame(data)

# Apply the CRPS function to each row
df['CRPS'] = df.apply(lambda row: crps_uniform(row['y_true'], row['lower_bound'], row['upper_bound']), axis=1)

# Display the DataFrame with CRPS values
print(df)

   y_true  lower_bound  upper_bound      CRPS
0     3.5          2.0          5.0  0.250000
1     4.2          3.5          6.0  0.087333
2     2.0          1.0          3.0  0.166667


In [6]:
# !pip install properscoring

In [9]:
# CRPS Calculation with properscoring
# ===================================
import numpy as np
import properscoring as ps

# True observed value
x_true = 3.5

# Forecast as a uniform distribution between L and U
lower_bound = 2.0
upper_bound = 5.0

# Sample from a uniform distribution between L and U
# You can adapt the same approach to Gaussian or other predictive distributions by changing the sampling method
samples = np.random.uniform(lower_bound, upper_bound, 500)

# Calculate CRPS using samples
crps = ps.crps_ensemble(np.array([x_true]), samples.reshape(1, -1))
print(f"CRPS: {crps[0]}")

CRPS: 0.24863008201570277


In [10]:
# CRPS Calculation with sktime
# ===================================
from sktime import CRPS

ModuleNotFoundError: No module named 'sktime'

In [None]:
# https://juanitorduz.github.io/electricity_forecast/

def crps(
    truth: Float[Array, " t"],
    pred: Float[Array, "n_samples t"],
    sample_weight: Float[Array, " t"] | None = None,
) -> Float[Array, ""]:
    if pred.shape[1:] != (1,) * (pred.ndim - truth.ndim - 1) + truth.shape:
        raise ValueError(
            f"""Expected pred to have one extra sample dim on left.
            Actual shapes: {pred.shape} versus {truth.shape}"""
        )

    absolute_error = jnp.mean(jnp.abs(pred - truth), axis=0)

    num_samples = pred.shape[0]
    if num_samples == 1:
        return jnp.average(absolute_error, weights=sample_weight)

    pred = jnp.sort(pred, axis=0)
    diff = pred[1:] - pred[:-1]
    weight = jnp.arange(1, num_samples) * jnp.arange(num_samples - 1, 0, -1)
    weight = weight.reshape(weight.shape + (1,) * (diff.ndim - 1))

    per_obs_crps = absolute_error - jnp.sum(diff * weight, axis=0) / num_samples**2
    return jnp.average(per_obs_crps, weights=sample_weight)