In [1]:
%load_ext autoreload
%autoreload 2
import sys
from pathlib import Path
path = str(Path.cwd().parent)
print(path)
sys.path.insert(1, path)

/home/ubuntu/varios/skforecast


In [2]:
# !pip install properscoring
# !pip install CRPS
# !pip install pymc_marketing

## CRPS score from skforecast predictions

En Skforecast hay varias opciones para obtener previsiones probabilísticas; dos de ellas son:

1) Obtener múltiples predicciones para cada paso, donde cada predicción es un posible valor obtenido mediante bootstraping. Es decir, se dispone de un valor y_true y de un array de predicciones para el mismo timestamp.

2) Obtener los diferentes cuantiles: q05, q10, q20, etc., para cada predicción. Es decir, se dispone de un valor y_true y de un array de cuantiles para ese mismo timestamp.

## CRPS score from an array of predictions

In [None]:
# Custom function
# ==============================================================================
# https://juanitorduz.github.io/electricity_forecast/

import numpy as np

def crps_emprical(
    y_truth: float,
    y_pred: np.ndarray,
    sample_weight = None,
) -> float:
    """
    Compute the Continuous Ranked Probability Score (CRPS) for  a set of
    explicit forecast realizations. The CRPS compares the empirical distribution
    of an ensemble forecast to a scalar observation. The smaller the CRPS, the
    better the forecasted distribution.

    Parameters
    ----------
    y_truth : float
        The true value of the random variable.
    y_pred : np.ndarray
        The predicted values of the random variable. These are the multiple
        forecasted values for a single observation.
    sample_weight : np.ndarray, optional
        Weights for each sample.

    Returns
    -------
    float
        The CRPS score.
    """
    absolute_error = np.mean(np.abs(y_pred - y_truth), axis=0)
    num_samples = len(y_pred)
    if num_samples == 1:
        return np.average(absolute_error, weights=sample_weight)

    y_pred = np.sort(y_pred, axis=0)
    diff = y_pred[1:] - y_pred[:-1]
    weight = np.arange(1, num_samples) * np.arange(num_samples - 1, 0, -1)
    weight = weight.reshape(weight.shape + (1,) * (diff.ndim - 1))
    per_obs_crps = absolute_error - np.sum(diff * weight, axis=0) / num_samples**2

    return np.average(per_obs_crps, weights=sample_weight)


y_true = 5
y_pred = np.random.normal(5, 5, 250)

crps_emprical(
    y_true,
    y_pred
)

1.153451758995442

In [4]:
# properscoring
# ==============================================================================
import numpy as np
import properscoring as ps
ps.crps_ensemble(
    y_true,
    y_pred
)

1.1534517589954416

In [5]:
# CRPS
# ==============================================================================
import CRPS.CRPS as pscore
pscore(y_pred, y_true).compute()[0]

1.1534517589954425

In [6]:
# pymc_marketing
# ==============================================================================
import numpy as np
from pymc_marketing.metrics import crps
crps(y_true, y_pred.reshape(-1, 1))



1.153451758995442

## Testing equivalence

In [7]:
import pandas as pd

rng = np.random.default_rng(123)
n = 500
df = pd.DataFrame({
    'y_true': rng.integers(-100, 100, 500),
    'y_pred_bootstrapping': [rng.integers(-100, 100, 50) for _ in range(n)]
})

df['custom_crps'] = df.apply(lambda x: crps_emprical(x['y_true'], x['y_pred_bootstrapping']), axis=1)
df['proper_crps'] = df.apply(lambda x: ps.crps_ensemble(x['y_true'], x['y_pred_bootstrapping']), axis=1)
df['pymc_crps'] = df.apply(lambda x: crps(x['y_true'], x['y_pred_bootstrapping'].reshape(-1, 1)), axis=1)

assert np.allclose(df['custom_crps'], df['proper_crps'])
assert np.allclose(df['custom_crps'], df['pymc_crps'])

df

Unnamed: 0,y_true,y_pred_bootstrapping,custom_crps,proper_crps,pymc_crps
0,-97,"[8, 1, 85, 51, 10, -3, 79, 65, 38, 8, -79, -29...",64.5376,64.5376,64.5376
1,36,"[91, 94, -24, -46, -79, -75, -89, 53, -15, -16...",27.5420,27.5420,27.5420
2,18,"[-97, -45, -2, 10, 53, 7, -92, -14, -13, -30, ...",19.3036,19.3036,19.3036
3,-90,"[-18, 2, 88, -100, -76, 65, -38, 66, 85, -33, ...",56.1820,56.1820,56.1820
4,81,"[9, -48, -88, -8, -37, 56, -33, -36, -24, -92,...",65.8704,65.8704,65.8704
...,...,...,...,...,...
495,-88,"[27, -75, -46, -19, -82, -47, 69, -31, 10, 89,...",56.8668,56.8668,56.8668
496,-33,"[-72, 78, -44, 51, 51, 4, -25, -94, 25, 83, 62...",21.5112,21.5112,21.5112
497,-45,"[58, -65, 55, 33, 87, 71, 28, 18, -83, -37, 5,...",42.1032,42.1032,42.1032
498,98,"[-80, -16, 89, -78, -51, 95, -17, -23, 63, 20,...",75.9720,75.9720,75.9720


## CRPS score from an array of quantiles

In [8]:
import numpy as np
import properscoring as ps
from scipy.interpolate import interp1d

# Example true value
true_value = np.array([5.0])

# Example predicted 10 quantiles and their corresponding levels
quantile_levels     = np.array([0.00, 0.025, 0.05, 0.10, 0.15, 0.20, 0.25, 0.30, 0.35, 0.40, 0.45, 0.50, 0.55, 0.60, 0.65, 0.70, 0.75, 0.80, 0.85, 0.90, 0.95, 0.975, 1.00])
predicted_quantiles = np.array([0.1, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 6.0, 6.5, 7.0, 7.5, 8.0, 8.5, 9.0, 9.5, 10.0, 10.5, 11.0, 11.5])

In [12]:
# crps_quadrature
# ==============================================================================
# Create an empirical CDF function using interpolation

def crps_quadrature_from_quantiles(true_value, predicted_quantiles, quantile_levels):
    """
    Calculate the Continuous Ranked Probability Score (CRPS) for a given true value
    and predicted quantiles using the function crps_quadrature from the properscoring
    library.

    Parameters
    ----------
    true_value : float
        The true value of the random variable.
    predicted_quantiles : np.array
        The predicted quantile values.
    quantile_levels : np.array
        The quantile levels corresponding to the predicted quantiles.

    Returns
    -------
    float
        The CRPS score.
    """
    if len(predicted_quantiles) != len(quantile_levels):
        raise ValueError("The number of predicted quantiles and quantile levels must be equal.")
    
    def empirical_cdf(x):
        # Interpolate between quantile levels and quantile values
        cdf_func = interp1d(predicted_quantiles, quantile_levels, bounds_error=False, fill_value=(0.0, 1.0))
        return cdf_func(x)

    # Integration bounds
    xmin = np.min(predicted_quantiles) - 1  # lower bound
    xmax = np.max(predicted_quantiles) + 1  # upper bound

    # Compute CRPS
    crps = ps.crps_quadrature(true_value, empirical_cdf, xmin, xmax)
    
    return crps[0]

crps_quadrature_from_quantiles(true_value, predicted_quantiles, quantile_levels)

0.9342500002584768

In [13]:
import numpy as np

def crps_quantile_interpolation(true_value, predicted_quantiles, quantile_levels):
    """
    Calculate the Continuous Ranked Probability Score (CRPS) for a given true value
    and predicted quantiles. The empirical cdf is approximated using linear interpolation
    between the predicted quantiles.

    Parameters
    ----------
    true_value : float
        The true value of the random variable.
    predicted_quantiles : np.array
        The predicted quantile values.
    quantile_levels : np.array
        The quantile levels corresponding to the predicted quantiles.

    Returns
    -------
    float
        The CRPS score.
    """
    if len(predicted_quantiles) != len(quantile_levels):
        raise ValueError("The number of predicted quantiles and quantile levels must be equal.")

    sorted_indices = np.argsort(predicted_quantiles)
    predicted_quantiles = predicted_quantiles[sorted_indices]
    quantile_levels = quantile_levels[sorted_indices]

    # Define the empirical CDF function using interpolation
    def empirical_cdf(x):
        return np.interp(x, predicted_quantiles, quantile_levels, left=0.0, right=1.0)

    # Define the CRPS integrand
    def crps_integrand(x):
        return (empirical_cdf(x) - (x >= true_value)) ** 2

    # Integration bounds: Extend slightly beyond predicted quantiles
    xmin = np.min(predicted_quantiles) - 2
    xmax = np.max(predicted_quantiles) + 2

    # Create a fine grid of x values for integration
    x_values = np.linspace(xmin, xmax, 1000)

    # Compute the integrand values and integrate using the trapezoidal rule
    integrand_values = crps_integrand(x_values)
    crps = np.trapz(integrand_values, x_values)

    return crps


crps_score = crps_quantile_interpolation(true_value, predicted_quantiles, quantile_levels)
print("CRPS:", crps_score)


CRPS: 0.934568678091545


In [14]:
import numpy as np

def crps_quantile_no_interpolation(true_value, predicted_quantiles, quantile_levels):
    """
    Calculate CRPS using predicted quantiles, treating the CDF as a step function.

    Parameters
    ----------
    true_value : float
        The true value of the random variable.
    predicted_quantiles : np.array
        The predicted quantile values.
    quantile_levels : np.array
        The quantile levels corresponding to the predicted quantiles.

    Returns
    -------
    float
        The CRPS score.
    """
    # Ensure inputs are numpy arrays and sorted
    predicted_quantiles = np.array(predicted_quantiles)
    quantile_levels = np.array(quantile_levels)
    
    sorted_indices = np.argsort(predicted_quantiles)
    predicted_quantiles = predicted_quantiles[sorted_indices]
    quantile_levels = quantile_levels[sorted_indices]

    # Initialize CRPS sum
    crps = 0.0

    # Iterate through each quantile segment
    for i in range(len(predicted_quantiles) - 1):
        q_i = predicted_quantiles[i]
        q_next = predicted_quantiles[i + 1]
        F_i = quantile_levels[i]
        
        # Heaviside step function difference
        heaviside_diff = F_i - (q_i >= true_value)
        
        # Segment width (difference between consecutive quantiles)
        segment_width = q_next - q_i
        
        # Add contribution of this segment to CRPS
        crps += heaviside_diff ** 2 * segment_width
    
    return crps

# Example usage:
crps_score = crps_quantile_no_interpolation(true_value, predicted_quantiles, quantile_levels)
print("CRPS:", crps_score)


CRPS: [0.988125]
