# Road Accident Severity Prediction
## Evaluation and Analysis

In [None]:
# ============================================================
# 16. METRICS COMPARISON TABLE
# ============================================================

metrics_table = []

for name in all_preds.keys():
    y_pred  = all_preds[name]
    proba   = all_proba[name]

    metrics_table.append({
        "Model": name,
        "Macro_F1": f1_score(y_test, y_pred, average="macro"),
        "Weighted_F1": f1_score(y_test, y_pred, average="weighted"),
        "Micro_F1": f1_score(y_test, y_pred, average="micro"),
        "Accuracy": accuracy_score(y_test, y_pred),
        "LogLoss": log_loss(y_test, proba),
        "Cohen_Kappa": cohen_kappa_score(y_test, y_pred),
    })

metrics_df = pd.DataFrame(metrics_table).set_index("Model")
metrics_df


# Model Comparision

In [None]:
plt.figure()
sns.barplot(x=metrics_df.index, y=metrics_df["Macro_F1"])
plt.ylim(0, 1)
plt.title("Model Comparison – Macro F1")
plt.xticks(rotation=15)
plt.show()


# Confusion Matrix

In [None]:
# ============================================================
# 17. CONFUSION MATRIX & CLASSIFICATION REPORT FOR BEST MODEL
# ============================================================

best_model_name = metrics_df["Macro_F1"].idxmax()
print("Best model by Macro F1:", best_model_name)

y_best = all_preds[best_model_name]

ConfusionMatrixDisplay.from_predictions(
    y_test, y_best,
    display_labels=["Slight (0)", "Serious (1)", "Fatal (2)"]
)
plt.title(f"Confusion Matrix – {best_model_name}")
plt.show()

print(classification_report(y_test, y_best))


# SHAP Explainability

In [None]:
# ============================================================
# 18. SHAP EXPLAINABILITY FOR LIGHTGBM (GLOBAL + PER-CLASS)
# ============================================================

# Use a subset for SHAP speed
X_shap = X_test_lgb.sample(min(5000, len(X_test_lgb)), random_state=42)

shap.initjs()
explainer = shap.TreeExplainer(lgb_model)
shap_values = explainer(X_shap)

# Global importance (all classes combined)
shap.summary_plot(shap_values.values, X_shap, plot_type="bar")


In [None]:
# Full beeswarm
shap.summary_plot(shap_values.values, X_shap)


In [None]:
# Class-specific importance: 0=Slight, 1=Serious, 2=Fatal
for cls in range(3):
    shap.summary_plot(
        shap_values.values[:, :, cls],
        X_shap,
        plot_type="bar",
        show=True
    )


In [None]:
import geopandas as gpd
from shapely.geometry import Point

# Drop rows without coordinates
acc_geo = acc.dropna(subset=["Latitude", "Longitude", "Severity_Class"]).copy()

# Optional: sample for speed/visual clarity
acc_geo_sample = acc_geo.sample(50_000, random_state=42)  # adjust 50k as you like

# Create geometry column from lon/lat
geometry = [Point(xy) for xy in zip(acc_geo_sample["Longitude"], acc_geo_sample["Latitude"])]

# GeoDataFrame with WGS84 (EPSG:4326)
gdf_acc = gpd.GeoDataFrame(
    acc_geo_sample,
    geometry=geometry,
    crs="EPSG:4326"
)
gdf_acc.head()


In [None]:
# Simple plot of accidents without basemap
fig, ax = plt.subplots(figsize=(12, 10))

# Plot by severity with different colors
colors = {0: 'green', 1: 'orange', 2: 'red'}
labels = {0: 'Slight', 1: 'Serious', 2: 'Fatal'}

for severity in [0, 1, 2]:
    subset = gdf_acc[gdf_acc['Severity_Class'] == severity]
    subset.plot(ax=ax, 
                color=colors[severity], 
                alpha=0.3, 
                markersize=1,
                label=labels[severity])

ax.set_title("UK Road Accidents by Severity", fontsize=16)
ax.set_xlabel("Longitude")
ax.set_ylabel("Latitude")
ax.legend()
plt.tight_layout()
plt.show()

print(f"Plotted {len(gdf_acc):,} accidents")