In [None]:
# --- Step 19: Basic Model Interpretability ---
print("\n--- Step 19: Basic Model Interpretability  ---")

if 'processed_feature_names' not in locals() or not processed_feature_names:
    print("Feature names not ")
else:
    print("\nLogistic Regression Coefficients (Top 10 by Absolute Magnitude):")
    lr_coefficients = pd.DataFrame({'Feature': processed_feature_names, 'Coefficient': log_reg_model.coef_[0]})
    lr_coefficients['Absolute_Coefficient'] = np.abs(lr_coefficients['Coefficient'])
    lr_coefficients = lr_coefficients.sort_values(by='Absolute_Coefficient', ascending=False)
    print(lr_coefficients.head(10))

    print("\nRandom Forest Feature Importance (Top 10 - using original RF):")
    rf_feature_importances = pd.DataFrame({'Feature': processed_feature_names, 'Importance': rf_model.feature_importances_})
    rf_feature_importances = rf_feature_importances.sort_values(by='Importance', ascending=False)
    print(rf_feature_importances.head(10))

    print("\nXGBoost Feature Importance (Top 10):")
    xgb_feature_importances = pd.DataFrame({'Feature': processed_feature_names, 'Importance': xgb_model.feature_importances_})
    xgb_feature_importances = xgb_feature_importances.sort_values(by='Importance', ascending=False)
    print(xgb_feature_importances.head(10))




# --- Step 20: Model Interpretability with SHAP (for Tuned XGBoost)  ---
print("\n--- Step 20:  Model Interpretability with SHAP (Tuned XGBoost) ---")

if not isinstance(processed_feature_names, list):
    processed_feature_names = [f'feature_{i}' for i in range(X_train_processed.shape[1])]

X_test_processed_df = pd.DataFrame(X_test_processed, columns=processed_feature_names)

try:
    explainer = shap.TreeExplainer(models['Tuned XGBoost'])
    shap_values = explainer.shap_values(X_test_processed_df)

    print("\n SHAP Summary Plot (Global Feature Importance - Bar Plot)...")
    plt.figure(figsize=(10, 6))
    shap.summary_plot(shap_values, X_test_processed_df, plot_type="bar", show=False)
    plt.title('SHAP Feature Importance for Tuned XGBoost ')
    plt.tight_layout()
    plt.show() # Display plot
    print("SHAP Summary Bar Plot displayed above.")

    print("\nGenerating SHAP Summary Plot (Beeswarm Plot)...")
    plt.figure(figsize=(10, 6))
    shap.summary_plot(shap_values, X_test_processed_df, show=False) # Beeswarm plot
    plt.title('SHAP Summary Plot (Beeswarm) for Tuned XGBoost ')
    plt.tight_layout()
    plt.show() #  plot

    if 'age' in processed_feature_names:
        print("\nGenerating SHAP Dependence Plot for 'age'...")
        plt.figure(figsize=(8, 6))
        shap.dependence_plot("age", shap_values, X_test_processed_df, show=False)
        plt.title('SHAP Dependence Plot for Age ')
        plt.tight_layout()
        plt.show() # plot

except Exception as e:
    print(f"Error during SHAP analysis: {e}")





In [None]:


from sklearn.inspection import permutation_importance
from sklearn.calibration import CalibrationDisplay
from sklearn.preprocessing import MinMaxScaler

# --- Step 22: Feature Importances for All Models ---
print("\n--- Step 22: Feature Importances for All Models ---")

feature_importance_results = {}

for name, model in models.items():
    try:
        if hasattr(model, "feature_importances_"):
            importance_vals = model.feature_importances_
        elif hasattr(model, "coef_"):
            importance_vals = np.abs(model.coef_[0])  # Logistic Regression
        else:
            # permutation importance
            perm_result = permutation_importance(
                model, X_test_processed, y_test,
                n_repeats=10, random_state=42, n_jobs=-1
            )
            importance_vals = perm_result.importances_mean

        feature_importance_results[name] = pd.DataFrame({
            "Feature": processed_feature_names,
            f"Importance_{name}": importance_vals
        }).sort_values(by=f"Importance_{name}", ascending=False)

        print(f"\nTop 10 Features for {name}:")
        print(feature_importance_results[name].head(10))

    except Exception as e:
        print(f"Skipping feature importance for {name}: {e}")

# --- Step 23: Combined Feature Importance Plot (Grouped Bars) ---
print("\n--- Step 23: Combined Feature Importance Plot ---")

# all importance tables
merged_importances = feature_importance_results[list(feature_importance_results.keys())[0]][["Feature"]]

for name, df_imp in feature_importance_results.items():
    merged_importances = merged_importances.merge(df_imp, on="Feature", how="left")

# --- NEW: Scale all importance values per model to [0,1] ---
scaler = MinMaxScaler()
for col in merged_importances.columns:
    if col != "Feature":
        merged_importances[col] = scaler.fit_transform(merged_importances[[col]])

# Melt for plotting
merged_melted = pd.melt(
    merged_importances,
    id_vars="Feature",
    var_name="Model",
    value_name="Importance"
)

plt.figure(figsize=(18, 8))
sns.barplot(data=merged_melted, x="Feature", y="Importance", hue="Model")
plt.title("Top Normalized Features across Models")
plt.xticks(rotation=90)
plt.tight_layout()
plt.show()

from sklearn.calibration import CalibrationDisplay
import matplotlib.pyplot as plt

print("\n--- Step 24: Calibration Curves ---")

# Define 12 distinct colors
colors = [
    "#1f77b4" ,"#7f7788", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd", "#8c564b",
    "#e377c2", "#7f7f7f", "#bcbd22", "#17becf", "#aec7e8", "#fcbb70"
]

plt.figure(figsize=(10, 8))

for i, (name, y_prob) in enumerate(prob_maps.items()):
    color = colors[i % len(colors)]  # cycle colors if more than 12
    try:
        CalibrationDisplay.from_predictions(
            y_test, y_prob, n_bins=10, strategy='uniform',
            name=name, ax=plt.gca(), color=color
        )
    except Exception as e:
        print(f"Skipping calibration for {name}: {e}")

plt.plot([0, 1], [0, 1], "k--", label="Perfectly Calibrated")
plt.title("Calibration Curves for All Models")
plt.xlabel("Mean Predicted Probability")
plt.ylabel("Fraction of Positives")
plt.legend(loc="best")
plt.grid(True)
plt.show()


