In [28]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import onnxruntime as rt
import os
from pathlib import Path
import matplotlib.pyplot as plt


In [2]:
# Load the dataset
data = pd.read_csv('data/investigation_train_large_checked.csv')

# Prepare features and target
y = data['checked'].astype(int)
X = data.drop(['Ja', 'Nee', 'checked'], axis=1)
X = X.astype(np.float32)

# Split the dataset into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)


In [31]:
print(data['belemmering_dagen_lichamelijke_problematiek'][:10])


0     2503
1     9114
2    -6980
3   -13386
4     4229
5    -1562
6     5487
7     -337
8     -138
9     -300
Name: belemmering_dagen_lichamelijke_problematiek, dtype: int64


In [38]:
# Define sensitive features to check for bias
# Each feature has partitions that define how to split the data
SENSITIVE_FEATURES = [
    {
        "name": "persoon_geslacht_vrouw", 
        "bias_type": "gender",
        "partitions": [
            {"label": "0", "value": 0.0},
            {"label": "1", "value": 1.0}
        ]
    },
    {
        "name": "pla_hist_pla_categorie_doelstelling_16", 
        "bias_type": "social benefits",
        "partitions": [
            {"label": "0", "value": 0.0},
            {"label": "1", "value": 1.0}
        ]
    },
    {
        "name": "ontheffing_dagen_hist_vanwege_uw_medische_omstandigheden", 
        "bias_type": "medical_history",
        "partitions": [
            {"label": "<500", "threshold": 500, "op": "<"},
            {"label": ">=500", "threshold": 500, "op": ">="}
        ]
    },
    {
        "name": "belemmering_dagen_lichamelijke_problematiek", 
        "bias_type": "medical_history",
        "partitions": [
            {"label": "0-1000", "min": 0, "max": 1000},
            {"label": ">1000", "threshold": 1000, "op": ">"}
        ]
    }
]
BIAS_THRESHOLD = 0.05  # Consider bias significant if accuracy difference > 3%


In [39]:
# ============================================================================
# Partition Testing Function
# ============================================================================
def test_model_partitions(model_name, y_pred, y_true, X_test, sensitive_features):
    """
    Test model performance across different partitions defined by sensitive features.
    
    Parameters:
    - model_name: Name of the model being tested
    - y_pred: Model predictions
    - y_true: True labels
    - X_test: Test features
    - sensitive_features: List of dictionaries with "name", "bias_type", and "partitions" keys
    """
    print(f"\n> -- Tests for {model_name} --")

    for feature_dict in sensitive_features:
        feature_name = feature_dict["name"]
        bias_type = feature_dict.get("bias_type", "unknown")
        partitions = feature_dict.get("partitions", [])
        
        print(f"Partition {feature_name} (bias_type: {bias_type}):")
        
        if feature_name not in X_test.columns:
            print(f"  ⚠️  Feature '{feature_name}' not found in test data")
            continue
        
        if not partitions:
            print(f"  ⚠️  No partitions defined for '{feature_name}'")
            continue
        
        accuracies = {}
        feature_values = X_test[feature_name]
        
        for partition in partitions:
            # Handle binary partitions (exact value match)
            if "value" in partition:
                mask = feature_values == partition["value"]
                label = partition["label"]
            # Handle range-based partitions (min to max, inclusive)
            elif "min" in partition and "max" in partition:
                min_val = partition["min"]
                max_val = partition["max"]
                mask = (feature_values >= min_val) & (feature_values <= max_val)
                label = partition["label"]
            # Handle threshold-based partitions
            elif "threshold" in partition and "op" in partition:
                threshold = partition["threshold"]
                op = partition["op"]
                if op == "<":
                    mask = feature_values < threshold
                elif op == ">=":
                    mask = feature_values >= threshold
                elif op == ">":
                    mask = feature_values > threshold
                elif op == "<=":
                    mask = feature_values <= threshold
                else:
                    print(f"  ⚠️  Unknown operator '{op}' for partition '{partition.get('label', 'unknown')}'")
                    continue
                label = partition["label"]
            else:
                print(f"  ⚠️  Invalid partition definition: {partition}")
                continue
            
            if mask.sum() > 0:
                group_pred = y_pred[mask]
                group_true = y_true[mask]
                group_accuracy = accuracy_score(group_true, group_pred)
                accuracies[label] = group_accuracy
                print(f"  {label}: Accuracy = {group_accuracy:.4f} (n={mask.sum()})")
            else:
                print(f"  {label}: No samples (n=0)")
        
        # Calculate and display delta if we have exactly 2 partitions
        if len(accuracies) == 2:
            labels = list(accuracies.keys())
            delta = accuracies[labels[1]] - accuracies[labels[0]]
            print(f"  → Delta ({labels[1]} - {labels[0]}): {delta:+.4f}")
            if abs(delta) > BIAS_THRESHOLD:
                print(f"  ⚠️  SIGNIFICANT BIAS DETECTED (threshold: {BIAS_THRESHOLD})")


In [40]:
# ============================================================================
# Load and Test All Models from /model Directory
# ============================================================================
model_dir = Path("model")

# Find all ONNX model files
onnx_models = sorted(model_dir.glob("*.onnx"))

if not onnx_models:
    print("⚠️  No ONNX models found in the 'model' directory.")
    print("   Please ensure models are saved in 'model/*.onnx' format.")
else:
    print(f"Found {len(onnx_models)} model(s) to test:")
    for model_path in onnx_models:
        print(f"  - {model_path.name}")
    
    print("\n" + "=" * 70)
    print("Testing Models Sequentially")
    print("=" * 70)
    
    # Test each model sequentially
    for model_path in onnx_models:
        try:
            model_name = model_path.stem  # Get filename without extension
            
            # Load ONNX model
            session = rt.InferenceSession(str(model_path))
            
            # Get predictions
            y_pred = session.run(None, {'X': X_test.values.astype(np.float32)})
            
            # Run partition tests
            test_model_partitions(model_name, y_pred[0], y_test, X_test, SENSITIVE_FEATURES)
            
        except Exception as e:
            print(f"\n❌ Error testing {model_path.name}: {e}")
            print(f"   Skipping to next model...")
            continue
    
    print("\n" + "=" * 70)
    print("All models tested successfully!")
    print("=" * 70)


Found 2 model(s) to test:
  - model_1.onnx
  - model_2.onnx

Testing Models Sequentially

> -- Tests for model_1 --
Partition persoon_geslacht_vrouw (bias_type: gender):
  0: Accuracy = 0.9455 (n=16673)
  1: Accuracy = 0.9465 (n=15827)
  → Delta (1 - 0): +0.0010
Partition pla_hist_pla_categorie_doelstelling_16 (bias_type: social benefits):
  0: Accuracy = 0.9231 (n=14603)
  1: Accuracy = 0.9630 (n=16102)
  → Delta (1 - 0): +0.0400
Partition ontheffing_dagen_hist_vanwege_uw_medische_omstandigheden (bias_type: medical_history):
  <500: Accuracy = 0.9309 (n=3561)
  >=500: Accuracy = 0.9478 (n=28939)
  → Delta (>=500 - <500): +0.0169
Partition belemmering_dagen_lichamelijke_problematiek (bias_type: medical_history):
  0-1000: Accuracy = 0.9472 (n=1931)
  >1000: Accuracy = 0.9511 (n=16732)
  → Delta (>1000 - 0-1000): +0.0039

> -- Tests for model_2 --
Partition persoon_geslacht_vrouw (bias_type: gender):
  0: Accuracy = 0.8776 (n=16673)
  1: Accuracy = 0.8755 (n=15827)
  → Delta (1 - 0): -0