**Note:** I have been doing the computation of metric here on Colab to make things easier sice there are so many cases (inflow/outflow, bike/taxi, single_prediction/multi_prediction).

Thus to replicate the computations it is necessary to bring into the runtime space the original data (taxi/bike volume test) and the various folders that we need to compute.

In [144]:
#remove folder taxi_predictions_200_regions_index_483_20_predictions_same_datapoint
!rm -rf bike_predictions_200_regions_index_483_20_predictions_same_datapoint

In [191]:
import os
import pandas as pd
import numpy as np

# Parameters
veichle = "bike" # "inflow" or "bike"
volume_type = "outflow"  # "inflow" or "outflow"

# File paths
original_data_file = f'{veichle}_volume_test.csv'
predictions_folder = f'{veichle}_predictions_200_regions_index_483_20_predictions_same_datapoint/'

# Load the original data
original_data = pd.read_csv(original_data_file)

# print head
original_data.head()

Unnamed: 0,start_volume,end_volume,region,time_interval
0,2.0,4.0,0,0
1,1.0,3.0,0,1
2,1.0,0.0,0,2
3,1.0,1.0,0,3
4,0.0,0.0,0,4


In [177]:
def denormalize_data(region_id, predictions, ground_truth, original_data):
    """
    De-normalizes predictions and ground truth using the original min and max values for a region.
    """
    column_name = 'start_volume' if volume_type == 'inflow' else 'end_volume'

    # Extract min and max for the region
    region_data = original_data[original_data['region'] == region_id][column_name]
    min_val = region_data.min()
    max_val = region_data.max()

    print("Min and Max for region", region_id, "are", min_val, max_val)

    # De-normalize predictions and ground truth
    predictions_denorm = predictions * (max_val - min_val) + min_val
    ground_truth_denorm = ground_truth * (max_val - min_val) + min_val

    print("De-normalized predictions ", predictions_denorm)
    print("De-normalized ground truth ", ground_truth_denorm)

    return predictions_denorm, ground_truth_denorm

In [178]:
def renormalize_taxi_data(data):
    """
    Re-normalizes the data for the taxi dataset by dividing by 1283.0.
    """
    return data / 1283.0

In [179]:
def renormalize_bike_data(data):
    """
    Re-normalizes the data for the bike dataset by dividing by 299.0
    """
    return data / 299.0

In [187]:
def compute_mae(predictions, ground_truth):
    return np.mean(np.abs(predictions - ground_truth))

def compute_rmse(predictions, ground_truth):
    return np.sqrt(np.mean((predictions - ground_truth) ** 2))

def compute_mape(predictions, ground_truth):
    #return np.mean(np.abs((predictions - ground_truth) / ground_truth)) * 100
    # Mask to ignore zero values in the ground truth
    non_zero_mask = ground_truth != 0
    filtered_predictions = predictions[non_zero_mask]
    filtered_ground_truth = ground_truth[non_zero_mask]

    # Handle the edge case where all ground truth values are zero
    if len(filtered_ground_truth) == 0:
        return float('inf')  # Return infinity or a specific large value

    # Compute MAPE
    return np.mean(np.abs((filtered_predictions - filtered_ground_truth) / filtered_ground_truth)) * 100

def compute_crps(predictions, ground_truth):
    """
    Computes CRPS for a single ground truth value and a list of predictions.
    """
    empirical_cdf = np.cumsum(np.sort(predictions)) / len(predictions)
    target_cdf = (np.sort(predictions) >= ground_truth).astype(int)
    return np.mean((empirical_cdf - target_cdf) ** 2)

In [181]:
def compute_metrics(predictions_folder, original_data, num_regions=200):
    """
    Computes metrics (MAE, RMSE, MAPE, CRPS) directly from aggregated true and predicted values.
    """
    true_vals = []
    pred_vals = []

    for region_id in range(num_regions):
        volume_name = 'start_volume' if volume_type == 'inflow' else 'end_volume'

        # Construct file name
        file_name = f"predictions_{volume_name}_region_{region_id}.csv"
        file_path = os.path.join(predictions_folder, file_name)

        try:
            # Load predictions for the region
            region_predictions = pd.read_csv(file_path)
            predictions = region_predictions['Prediction'].values
            ground_truth = region_predictions['Ground_Truth'].values[0]  # Single ground truth value

            # De-normalize the predictions and ground truth
            predictions_denorm, ground_truth_denorm = denormalize_data(
                region_id, predictions, ground_truth, original_data
            )

            # Filter out regions with de-normalized ground truth below 10
            if ground_truth_denorm < 10:
                print(f"Region {region_id} has ground truth below 10. Skipping.")
                print("---------------------")
                continue

            # Re-normalize values
            if veichle == "taxi":
                predictions_renorm = renormalize_taxi_data(predictions_denorm)
                ground_truth_renorm = renormalize_taxi_data(ground_truth_denorm)
            elif veichle == "bike":
                predictions_renorm = renormalize_bike_data(predictions_denorm)
                ground_truth_renorm = renormalize_bike_data(ground_truth_denorm)

            # Append the re-normalized ground truth and median of predictions
            true_vals.append(ground_truth_renorm)
            pred_vals.append(np.median(predictions_renorm))

        except FileNotFoundError:
            print(f"File not found for region {region_id}. Skipping.")

    # Convert lists to arrays
    true_vals = np.array(true_vals)
    pred_vals = np.array(pred_vals)

    # Compute metrics
    mae = compute_mae(pred_vals, true_vals)
    rmse = compute_rmse(pred_vals, true_vals)
    mape = compute_mape(pred_vals, true_vals)
    crps = np.mean([compute_crps(pred_vals, true_val) for true_val in true_vals])

    return {
        'MAE': mae,
        'RMSE': rmse,
        'MAPE': mape,
        'CRPS': crps
    }


In [142]:
# Compute the metrics across all regions
metrics_df = compute_metrics(predictions_folder, original_data, num_regions=200)

print(metrics_df)

# Save to a file .json
import json

# saved file naming based on the params
with open(f'metrics_{veichle}_{volume_type}.json', 'w') as f:
    json.dump(metrics_df, f)



Min and Max for region 0 are 0.0 104.0
De-normalized predictions  [0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
 0.   0.   0.   1.04 0.   0.  ]
De-normalized ground truth  0.9999999999999984
Region 0 has ground truth below 10. Skipping.
---------------------
Min and Max for region 1 are 0.0 101.0
De-normalized predictions  [0.   0.   0.   0.   0.   1.01 0.   0.   0.   0.   0.   0.   0.   0.
 0.   0.   0.   0.   0.   0.  ]
De-normalized ground truth  0.9999999999999999
Region 1 has ground truth below 10. Skipping.
---------------------
Min and Max for region 2 are 0.0 63.0
De-normalized predictions  [1.89 4.41 0.63 0.63 0.63 0.63 0.63 0.63 0.   2.52 4.41 0.   0.   2.52
 0.   0.63 0.   0.   0.63 0.  ]
De-normalized ground truth  0.9999999999999953
Region 2 has ground truth below 10. Skipping.
---------------------
Min and Max for region 3 are 0.0 23.0
De-normalized predictions  [1.84 1.84 0.92 1.84 0.   1.84 1.84 3.91 1.84 1.84 1.84 1.84 1.84 1.84
 1.84 1.84 0.   1

In [182]:
# Multiple Values Prediction
def compute_metrics_across_runs(original_data, num_regions=200, num_runs=5):
    """
    Computes metrics (MAE, RMSE, MAPE, CRPS) directly from aggregated true and predicted values
    across multiple runs.
    """
    true_vals = []
    pred_vals = []

    # For each region
    for region_id in range(num_regions):
        volume_name = 'start_volume' if volume_type == 'inflow' else 'end_volume'

        # Prepare to collect predictions from all runs
        all_run_predictions = []

        #For each run
        region_ground_truth = None
        for run_id in range(1, num_runs + 1):

            # Construct folder and file paths for each run
            predictions_folder = f"{veichle}_predictions_200_regions_20_datapoints_run_{run_id}"
            file_name = f"predictions_{volume_name}_region_{region_id}.csv"
            file_path = os.path.join(predictions_folder, file_name)

            try:
                # Load predictions for the current run and region
                # Example: taxi_predictions_200_regions_20_datapoints_run_1/predictions_start_volume_region_0
                #print("Loading file:", file_path)

                region_predictions = pd.read_csv(file_path)

                predictions = region_predictions['Prediction'].values
                ground_truth = region_predictions['Ground_Truth'].values

                #print("Predictions:", predictions)
                #print("Ground Truth:", ground_truth)

                # Store ground truth once (it's constant across runs)
                if region_ground_truth is None:
                    region_ground_truth = ground_truth

                # Collect predictions from this run
                all_run_predictions.append(predictions)

            except FileNotFoundError:
                print(f"File not found for run {run_id}, region {region_id}. Skipping.")

        # Compute the mean predictions across runs
        mean_predictions = np.mean(all_run_predictions, axis=0)

        # De-normalize the mean predictions and ground truth
        predictions_denorm, ground_truth_denorm = denormalize_data(
            region_id, mean_predictions, ground_truth, original_data
        )

        # Filter out regions with the mean of de-normalized ground truth below 10
        if np.mean(ground_truth_denorm) < 10:
            print(f"Region {region_id} has the mean ground truth below 10. Skipping.")
            print("---------------------")
            continue

        # Re-normalize values
        if veichle == "taxi":
            predictions_renorm = renormalize_taxi_data(predictions_denorm)
            ground_truth_renorm = renormalize_taxi_data(ground_truth_denorm)
        elif veichle == "bike":
            predictions_renorm = renormalize_bike_data(predictions_denorm)
            ground_truth_renorm = renormalize_bike_data(ground_truth_denorm)

        # Append the re-normalized ground truth and median of predictions
        true_vals.extend(ground_truth_renorm)
        pred_vals.extend(predictions_renorm)

    # Convert lists to arrays
    true_vals = np.array(true_vals)
    pred_vals = np.array(pred_vals)

    # Compute metrics
    mae = compute_mae(pred_vals, true_vals)
    rmse = compute_rmse(pred_vals, true_vals)
    mape = compute_mape(pred_vals, true_vals)
    crps = np.mean([compute_crps(pred_vals, true_val) for true_val in true_vals])

    return {
        'MAE': mae,
        'RMSE': rmse,
        'MAPE': mape,
        'CRPS': crps
    }


In [192]:
metrics_df = compute_metrics_across_runs(original_data, num_regions=200, num_runs=5)

print(metrics_df)

# Save to a file .json
import json

# saved file naming based on the params
with open(f'metrics_multiple_pred_{veichle}_{volume_type}.json', 'w') as f:
    json.dump(metrics_df, f)


Min and Max for region 0 are 0.0 89.0
De-normalized predictions  [ 0.534  0.89   0.89   1.068  0.712  0.178  0.     0.     0.     0.178
  1.068  7.832  1.424  1.068  6.764  9.078  6.586 10.146 11.392 27.056]
De-normalized ground truth  [ 1.  1.  2.  1.  2.  1.  0.  0.  1.  1.  2. 11.  3.  4. 10. 12. 13. 14.
 17. 21.]
Region 0 has the mean ground truth below 10. Skipping.
---------------------
Min and Max for region 1 are 0.0 88.0
De-normalized predictions  [ 0.528  0.704  0.352  0.704  1.056  0.     0.88   0.     0.352  0.88
  0.88   4.4    4.752  4.048  5.984  5.632 11.968 27.456 18.832 19.184]
De-normalized ground truth  [ 2.  2.  1.  2.  2.  0.  2.  1.  1.  4.  2.  5.  6.  6.  5. 11. 13. 25.
 13. 19.]
Region 1 has the mean ground truth below 10. Skipping.
---------------------
Min and Max for region 2 are 0.0 53.0
De-normalized predictions  [0.318 0.    0.106 0.424 0.    2.014 0.    0.    0.106 3.074 0.    1.06
 3.498 3.18  2.968 4.24  3.074 5.724 5.512 7.314]
De-normalized ground t

In [189]:
#remove folders
!rm -rf taxi_predictions_200_regions_20_datapoints_run_1
!rm -rf taxi_predictions_200_regions_20_datapoints_run_2
!rm -rf taxi_predictions_200_regions_20_datapoints_run_3
!rm -rf taxi_predictions_200_regions_20_datapoints_run_4
!rm -rf taxi_predictions_200_regions_20_datapoints_run_5