<!-- ABSTRACT -->

With this script, we apply monte carlo dropout to the trained model and check how well it performs. The result is a plot of the uncertainty of the model's predictions. However, it seems that the uncertainty is not very high.

In [None]:
import os
import sys
import json
import joblib

import numpy as np
from tqdm import tqdm
import geopandas as gpd

import torch

# Add the 'scripts' directory to Python Path
scripts_path=os.path.abspath(os.path.join(os.getcwd(), '..'))
if scripts_path not in sys.path:
    sys.path.append(scripts_path)

import evaluation.help_functions as hf
import evaluation.plot_functions as pf

from gnn.help_functions import mc_dropout_predict
from gnn.models.point_net_transf_gat import PointNetTransfGAT
from data_preprocessing.help_functions import highway_mapping

In [None]:
# Get the absolute path to the project root
project_root = os.path.abspath(os.path.join(os.getcwd(), '..', '..'))

# Paths
run_path = os.path.join(project_root, "data", "runs_01_2025", "wannabe_best_6")
districts = gpd.read_file(os.path.join(project_root, "data", "visualisation", "districts_paris.geojson"))
base_case_path = os.path.join(project_root, "data", "links_and_stats", "pop_1pct_basecase_average_output_links.geojson")
result_path = 'results/'


# GNN Parameters
point_net_conv_layer_structure_local_mlp="256"
point_net_conv_layer_structure_global_mlp = "512"
gat_conv_layer_structure = "128,256,512"
dropout = 0.3
use_dropout = False
predict_mode_stats = False
in_channels = 5
out_channels = 1

links_base_case = gpd.read_file(base_case_path, crs="EPSG:4326")
data_created_during_training = os.path.join(run_path, 'data_created_during_training')

In [None]:
###########################################
### Load test data from the run itself! ###
###########################################

# Load scalers
scaler_x = joblib.load(os.path.join(data_created_during_training, 'test_x_scaler.pkl'))
scaler_pos = joblib.load(os.path.join(data_created_during_training, 'test_pos_scaler.pkl'))

# Load the test dataset created during training
test_set_dl = torch.load(os.path.join(data_created_during_training, 'test_dl.pt'))

# Load the DataLoader parameters
with open(os.path.join(data_created_during_training, 'test_loader_params.json'), 'r') as f:
    test_set_dl_loader_params = json.load(f)
    
# Remove or correct collate_fn if it is incorrectly specified
if 'collate_fn' in test_set_dl_loader_params and isinstance(test_set_dl_loader_params['collate_fn'], str):
    del test_set_dl_loader_params['collate_fn']  # Remove it to use the default collate function
    
test_set_loader = torch.utils.data.DataLoader(test_set_dl, **test_set_dl_loader_params)

In [None]:
point_net_conv_layer_structure_local_mlp = [int(x) for x in point_net_conv_layer_structure_local_mlp.split(',')]
point_net_conv_layer_structure_global_mlp = [int(x) for x in point_net_conv_layer_structure_global_mlp.split(',')]
gat_conv_layer_structure = [int(x) for x in gat_conv_layer_structure.split(',')]

model = PointNetTransfGAT(in_channels=in_channels, out_channels=out_channels,
              point_net_conv_layer_structure_local_mlp=point_net_conv_layer_structure_local_mlp, 
              point_net_conv_layer_structure_global_mlp = point_net_conv_layer_structure_global_mlp,
              gat_conv_layer_structure=gat_conv_layer_structure,
              dropout=dropout,
              use_dropout=use_dropout,
              predict_mode_stats=predict_mode_stats)

# Load the model state dictionary
model_path = os.path.join(run_path, 'trained_model/model.pth')
model.load_state_dict(torch.load(model_path), strict=False)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

loss_fct = torch.nn.MSELoss().to(dtype=torch.float32).to(device)

In [None]:
test_loss, r_squared, actual_vals, predictions, baseline_loss = hf.validate_model_on_test_set(model, test_set_loader.dataset, loss_fct, device)

print(f"Test Loss: {test_loss}")
print(f"R-squared: {r_squared}")
print(f"Baseline Loss: {baseline_loss}")

### Next, we will look at single elements of the test set and visualize the performance of the model.


In [None]:
i = 2 # index from the test set, pick a particular sample

fixed_norm_max = 50
    
my_test_data = test_set_loader.dataset[i]
my_test_x = test_set_loader.dataset[i].x
my_test_x = my_test_x.to('cpu')

test_loss_my_test_data, r_squared_my_test_data, actual_vals_my_test_data, predictions_my_test_data, baseline_loss_my_test_data = hf.validate_model_on_test_set(model, my_test_data, loss_fct, device)
print(f"Sample {i}")
print(f"Test Loss: {test_loss_my_test_data}")
print(f"R-squared: {r_squared_my_test_data}")
print(f"Baseline Loss: {baseline_loss_my_test_data}")

inversed_x = scaler_x.inverse_transform(my_test_x)

gdf_with_og_values = hf.data_to_geodataframe_with_og_values(data=my_test_data, original_gdf=links_base_case, predicted_values=predictions_my_test_data, inversed_x=inversed_x)
gdf_with_og_values['capacity_reduction_rounded'] = gdf_with_og_values['capacity_reduction'].round(decimals=3)
gdf_with_og_values['highway'] = gdf_with_og_values['highway'].map(highway_mapping)

# gdf_with_og_values['district'] = gdf_with_og_values.apply(lambda row: districts[districts.contains(row.geometry)].iloc[0]['c_ar'] if not districts[districts.contains(row.geometry)].empty else 'Unknown', axis=1)
# gdf_with_og_values = gpd.sjoin(gdf_with_og_values, districts, how='left', op='intersects')

print(f"\nPredicted:")
pf.plot_combined_output(gdf_input=gdf_with_og_values, column_to_plot="vol_car_change_predicted", 
                        save_it=False, number_to_plot=i, zone_to_plot="this zone", is_predicted=True, alpha=0, use_fixed_norm=True, 
                        fixed_norm_max=fixed_norm_max, known_districts=False, buffer=0.0005, districts_of_interest=None,
                        plot_contour_lines=True, plot_policy_roads=False, result_path=result_path, with_legend=False)

print(f"Actual:")
pf.plot_combined_output(gdf_input=gdf_with_og_values, column_to_plot="vol_car_change_actual", save_it=False, 
                        number_to_plot=i, zone_to_plot="this zone", is_predicted=False,alpha=10,use_fixed_norm=True, 
                        fixed_norm_max=fixed_norm_max, known_districts=False, buffer=0.0005, districts_of_interest=None,
                        plot_contour_lines=True, plot_policy_roads=False, result_path=result_path, with_legend=False)

In [None]:
# MC DROPOUT on Single Sample

i = 32
test_data = test_set_loader.dataset[i]
test_x = test_set_loader.dataset[i].x
test_x = test_x.to('cpu')

test_loss, r_squared, actual_vals, predictions, baseline_loss = hf.validate_model_on_test_set(model, test_data, loss_fct, device)
print(f"Test {i}")
print(f"Test Loss: {test_loss}")
print(f"R-squared: {r_squared}")
print(f"Baseline Loss: {baseline_loss}")

inversed_x = scaler_x.inverse_transform(test_x)
mean_predictions, uncertainties = mc_dropout_predict(model, test_data, num_samples=50, device=device)

gdf_with_og_values = hf.data_to_geodataframe_with_og_values(data=test_data, original_gdf=links_base_case, predicted_values=predictions, inversed_x=inversed_x, use_all_features=False)
gdf_with_og_values['capacity_reduction_rounded'] = gdf_with_og_values['capacity_reduction'].round(decimals=3)
gdf_with_og_values['highway'] = gdf_with_og_values['highway'].map(highway_mapping)
gdf_with_og_values['mc_uncertainty'] = uncertainties

pf.plot_combined_output(gdf_input=gdf_with_og_values, column_to_plot="mc_uncertainty", plot_contour_lines=False,
                        save_it=False, number_to_plot=i, zone_to_plot="this zone", is_predicted=True, use_fixed_norm=False,
                        known_districts=False, buffer=0.0005, districts_of_interest=None, cmap='Reds')

In [None]:
# MC DROPOUT on entire test set

mean_uncertainties = []

for i in tqdm(range(len(test_set_loader.dataset))):
    
    test_data = test_set_loader.dataset[i]
    test_x = test_set_loader.dataset[i].x
    test_x = test_x.to('cpu')

    mean_predictions, uncertainties = mc_dropout_predict(model, test_data, num_samples=50, device=device)
    mean_uncertainties.append(uncertainties)

mean_uncertainties = np.array(mean_uncertainties).mean(axis=0)

In [None]:
# On the last sample, but does not matter
inversed_x = scaler_x.inverse_transform(test_x)
gdf_with_og_values = hf.data_to_geodataframe_with_og_values(data=test_data, original_gdf=links_base_case, predicted_values=mean_predictions, inversed_x=inversed_x, use_all_features=False)
gdf_with_og_values['capacity_reduction_rounded'] = gdf_with_og_values['capacity_reduction'].round(decimals=3)
gdf_with_og_values['highway'] = gdf_with_og_values['highway'].map(highway_mapping)
gdf_with_og_values['mc_uncertainty'] = mean_uncertainties

pf.plot_combined_output(gdf_input=gdf_with_og_values, column_to_plot="mc_uncertainty", plot_contour_lines=False,
                        save_it=False, number_to_plot=i+1, zone_to_plot="this zone", is_predicted=True, use_fixed_norm=False,
                        known_districts=False, buffer=0.0005, districts_of_interest=None, cmap='Reds')