# Calibration Playground

### Prepare Workspace

In [4]:
# Import system libraries
import os
import sys
import warnings
warnings.filterwarnings("ignore")

# Import data manipulation librariaes
import pandas as pd
import numpy as np
from consts import ORDERED_SITE_IDS
# Import statistics libraries
from sklearn.isotonic import IsotonicRegression

# Set working directory
#os.chdir('/Users/jessicarapson/Documents/GitHub/water-supply-forecast')
path = os.getcwd()

### Perform Isotonic Regression

In [13]:
# Import predictions
final_val = pd.read_csv(os.path.join(path, '..', 'models/calibration_data/final_val.csv'))
val_gt = pd.read_csv(os.path.join(path, '..', 'models/calibration_data/val_gt.csv'))
print(val_gt.shape)
print(final_val.shape)
val_gt = val_gt.groupby(val_gt.forecast_year) \
        .apply(lambda x: x.sort_values(['site_id'])).reset_index(drop=True)# Assuming you have predictions for the 10th, 50th, and 90th percentiles separately
val_gt.to_csv(os.path.join(path, '..', 'models/calibration_data/val_gt.csv'))
quantiles = [0.1, 0.5, 0.9]
predictions_10th = final_val['volume_10']
predictions_50th = final_val['volume_50']
predictions_90th = final_val['volume_90']
ground_truth = val_gt['volume']

# Fit isotonic regression separately for each quantile
iso_reg_10th = IsotonicRegression(out_of_bounds='clip')
iso_reg_50th = IsotonicRegression(out_of_bounds='clip')
iso_reg_90th = IsotonicRegression(out_of_bounds='clip')

iso_reg_10th.fit(predictions_10th, ground_truth)
iso_reg_50th.fit(predictions_50th, ground_truth)
iso_reg_90th.fit(predictions_90th, ground_truth)

# Calibrate predictions for each quantile separately
calibrated_predictions_10th = iso_reg_10th.predict(predictions_10th)
calibrated_predictions_50th = iso_reg_50th.predict(predictions_50th)
calibrated_predictions_90th = iso_reg_90th.predict(predictions_90th)

(3620, 3)
(3620, 5)


### Calculate Pinball Loss

In [12]:
# Define a function to compute quantile loss for a single quantile
def quantile_loss(y_true, y_pred, q):
    residual = y_true - y_pred
    return np.mean(np.maximum(q * residual, (q - 1) * residual))

# Calculate average mean quantile loss across quantiles of interest
average_mean_quantile_loss = np.mean([
    quantile_loss(ground_truth, predictions_10th, quantiles[0]),
    quantile_loss(ground_truth, predictions_50th, quantiles[1]),
    quantile_loss(ground_truth, predictions_90th, quantiles[2])
])

print("Average Mean Quantile Loss:", average_mean_quantile_loss)

# Calculate average mean quantile loss across quantiles of interest
average_mean_quantile_loss = np.mean([
    quantile_loss(ground_truth, calibrated_predictions_10th, quantiles[0]),
    quantile_loss(ground_truth, calibrated_predictions_50th, quantiles[1]),
    quantile_loss(ground_truth, calibrated_predictions_90th, quantiles[2])
])

print("Average Mean Quantile Loss:", average_mean_quantile_loss)

Average Mean Quantile Loss: 520.9317209257468
Average Mean Quantile Loss: 401.0854969208647
