In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import shap
import matplotlib.pyplot as plt


In [None]:
from sklearn.datasets import load_breast_cancer

# Load the dataset
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = data.target

# Display dataset info
print("Feature Names:\n", data.feature_names)
print("\nTarget Classes:", data.target_names)
print("\nSample Data:\n", X.head())


In [None]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"\nTraining Data Shape: {X_train.shape}")
print(f"Testing Data Shape: {X_test.shape}")


In [None]:
# Initialize and train the Random Forest model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"\nTest Accuracy: {accuracy:.4f}")
print("\nClassification Report:\n", classification_report(y_test, y_pred))


In [None]:
# Create a SHAP explainer
explainer = shap.TreeExplainer(model)

# Compute SHAP values for the test set
shap_values = explainer.shap_values(X_test)

print("\nSHAP Values Computed Successfully!")


In [None]:
# Global feature importance plot
shap.summary_plot(shap_values[1], X_test, plot_type="bar", show=False)
plt.title("Global Feature Importance")
plt.show()


In [None]:
# Visualize SHAP explanation for a single prediction
sample_index = 0
shap.force_plot(
    explainer.expected_value[1], 
    shap_values[1][sample_index], 
    X_test.iloc[sample_index],
    matplotlib=True
)


In [None]:
# Dependence plot for a specific feature
feature_name = "mean radius"  # Example feature
shap.dependence_plot(feature_name, shap_values[1], X_test)


In [None]:
# Summary plot
shap.summary_plot(shap_values[1], X_test)


In [None]:
# Save SHAP values
np.save("shap_values.npy", shap_values[1])
print("\nSHAP Analysis Saved!")
