In [None]:
# Importing necessary libraries
import shap
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
import requests


# Load cervical cancer dataset from the UCI repository
url_cervical = "https://archive.ics.uci.edu/ml/machine-learning-databases/00383/risk_factors_cervical_cancer.csv"
data = pd.read_csv(url_cervical)
data = data.replace('?', np.nan)
# Display the first few rows of the dataset for a quick overview
data_cervical.head()

# Preprocessing: Handle missing values, separate features and target, and scale features
imputer = SimpleImputer(strategy='mean')
data_imputed = pd.DataFrame(imputer.fit_transform(data), columns=data.columns)
X = data_imputed.drop('Biopsy', axis=1)  # Assuming 'Biopsy' is the target variable
y = data_imputed['Biopsy']
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train a RandomForest model
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Apply KernelSHAP
explainer = shap.KernelExplainer(model.predict_proba, X_train)
shap_values = explainer.shap_values(X_test)

# Visualize the SHAP values
shap.summary_plot(shap_values, X_test, feature_names=data.columns[:-1])

# Individual prediction explanation (example)
shap.initjs()
index = 0  # Change this index to explore other predictions
shap.force_plot(explainer.expected_value[1], shap_values[1][index], X_test[index])




  0%|          | 0/258 [00:00<?, ?it/s]