#

In [None]:
# Step 0: Install SHAP library quietly
%pip install -q shap
# Step 1: Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Step 2: Import scikit-learn modules for preprocessing, splitting data, and evaluation

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
# Step 3: Import PyTorch modules
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
# Step 4: Import SHAP for explainable AI
import shap
# Step 5: Set device (GPU if available, otherwise CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)


# Load the dataset 

In [None]:
# from google.colab import files
# uploaded = files.upload()
import pandas as pd

df = pd.read_csv("updated_ckd_dataset_with_stages.csv")   # update file name
df.head()



## Loading and Preprocessing the CKD Dataset from a ZIP File

In [None]:
import zipfile
import pandas as pd
import os

zip_path = "DKD.zip"   # file must be in your project folder
extract_folder = "./extracted_data"

# Create folder if it doesn't exist
os.makedirs(extract_folder, exist_ok=True)

# Unzip DKD.zip
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_folder)

# Path to CSV inside the unzipped folder
csv_path = os.path.join(extract_folder, "updated_ckd_dataset_with_stages.csv")

# Load CSV
df = pd.read_csv(csv_path)

FEATURE_COLS = [
    "serum_creatinine",
    "gfr",
    "bun",
    "serum_calcium",
    "ana",
    "c3_c4",
    "hematuria",
    "oxalate_levels",
    "urine_ph",
    "blood_pressure",
    "months"
]

# Keep only columns that exist
FEATURE_COLS = [c for c in FEATURE_COLS if c in df.columns]
print("Using features:", FEATURE_COLS)

BINARY_LABEL_COL = "ckd_pred"
STAGE_LABEL_COL = "ckd_stage"

# Clean missing values
df = df.fillna(df.median(numeric_only=True))

print(df[BINARY_LABEL_COL].value_counts())
print(df[STAGE_LABEL_COL].value_counts())


## Preparing Data for Binary CKD Classification

In [None]:
df_bin = df.dropna(subset=FEATURE_COLS + [BINARY_LABEL_COL])

# Map 'CKD' to 1 and 'No CKD' to 0 for binary classification
df_bin[BINARY_LABEL_COL] = df_bin[BINARY_LABEL_COL].apply(lambda x: 1 if x == 'CKD' else 0)

X_bin = df_bin[FEATURE_COLS].values
y_bin = df_bin[BINARY_LABEL_COL].astype(int).values

print("Binary data:", X_bin.shape, y_bin.shape)

## Splitting and Scaling Data for Binary CKD Classification

In [None]:
Xb_train, Xb_test, yb_train, yb_test = train_test_split(
    X_bin, y_bin, test_size=0.2, random_state=42, stratify=y_bin
)

scaler_bin = StandardScaler()
Xb_train_sc = scaler_bin.fit_transform(Xb_train)
Xb_test_sc  = scaler_bin.transform(Xb_test)


## Creating a PyTorch Dataset for Tabular Data

In [None]:
class TabularDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]


## Defining a Multi-Layer Perceptron (MLP) Model in PyTorch

In [None]:
class MLP(nn.Module):
    def __init__(self, input_dim, num_classes):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, 64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, num_classes)
        )

    def forward(self, x):
        return self.net(x)


## Training Function for PyTorch MLP Model

In [None]:
def train_model(model, train_loader, val_loader=None, epochs=30, lr=1e-3):
    model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    for epoch in range(1, epochs+1):
        model.train()
        total_loss = 0

        for X_batch, y_batch in train_loader:
            X_batch = X_batch.to(device)
            y_batch = y_batch.to(device)

            optimizer.zero_grad()
            logits = model(X_batch)
            loss = criterion(logits, y_batch)
            loss.backward()
            optimizer.step()

            total_loss += loss.item() * X_batch.size(0)

        avg_loss = total_loss / len(train_loader.dataset)

        # validation accuracy
        if val_loader:
            model.eval()
            correct, total = 0, 0
            with torch.no_grad():
                for Xv, yv in val_loader:
                    Xv = Xv.to(device)
                    yv = yv.to(device)
                    preds = model(Xv).argmax(dim=1)
                    correct += (preds == yv).sum().item()
                    total += yv.size(0)
            acc = correct / total
            print(f"Epoch {epoch:03d} | Loss: {avg_loss:.4f} | Val Acc: {acc:.4f}")

    return model


## Training the Binary CKD (DKD Yes/No) Model

In [None]:
batch_size = 64

train_ds_bin = TabularDataset(Xb_train_sc, yb_train)
test_ds_bin  = TabularDataset(Xb_test_sc,  yb_test)

train_loader_bin = DataLoader(train_ds_bin, batch_size=batch_size, shuffle=True)
test_loader_bin  = DataLoader(test_ds_bin,  batch_size=batch_size, shuffle=False)

input_dim = Xb_train_sc.shape[1]
model_bin = MLP(input_dim, 2)

print("Training DKD Yes/No model...")
model_bin = train_model(model_bin, train_loader_bin, test_loader_bin, epochs=30)


## Evaluating the Binary CKD (DKD Yes/No) Model

In [None]:
model_bin.eval()
preds, true = [], []

with torch.no_grad():
    for Xb, yb in test_loader_bin:
        Xb = Xb.to(device)
        p = model_bin(Xb).argmax(dim=1).cpu().numpy()
        preds.extend(p)
        true.extend(yb.numpy())

print("=== DKD Early Detection ===")
print(confusion_matrix(true, preds))
print(classification_report(true, preds, digits=4))


## Preparing Data for CKD Stage Classification

In [None]:
df_stage = df.dropna(subset=FEATURE_COLS + [STAGE_LABEL_COL])

X_stage = df_stage[FEATURE_COLS].values
y_stage = df_stage[STAGE_LABEL_COL].astype(int).values

print("Stage data:", X_stage.shape, y_stage.shape)


## Splitting and Scaling Data for CKD Stage Classification

In [None]:
Xs_train, Xs_test, ys_train, ys_test = train_test_split(
    X_stage, y_stage, test_size=0.2, random_state=42, stratify=y_stage
)

scaler_stage = StandardScaler()
Xs_train_sc = scaler_stage.fit_transform(Xs_train)
Xs_test_sc  = scaler_stage.transform(Xs_test)


## Adjusting Labels for CKD Stage Classification

In [None]:
ys_train0 = ys_train - ys_train.min()
ys_test0  = ys_test  - ys_test.min()

num_classes = len(np.unique(ys_train0))
print("Stage classes:", num_classes)


## Training the Multi-Class CKD Stage Prediction Model

In [None]:
train_ds_stage = TabularDataset(Xs_train_sc, ys_train0)
test_ds_stage  = TabularDataset(Xs_test_sc,  ys_test0)

train_loader_stage = DataLoader(train_ds_stage, batch_size=64, shuffle=True)
test_loader_stage  = DataLoader(test_ds_stage,  batch_size=64, shuffle=False)

input_dim_stage = Xs_train_sc.shape[1]
model_stage = MLP(input_dim_stage, num_classes)

print("Training Stage Prediction model...")
model_stage = train_model(model_stage, train_loader_stage, test_loader_stage, epochs=40)


## Evaluating the Multi-Class CKD Stage Prediction Model

In [None]:
model_stage.eval()
s_preds, s_true = [], []

with torch.no_grad():
    for Xs, ys in test_loader_stage:
        Xs = Xs.to(device)
        p = model_stage(Xs).argmax(dim=1).cpu().numpy()
        s_preds.extend(p)
        s_true.extend(ys.numpy())

print("=== DKD Stage Prediction ===")
print(confusion_matrix(s_true, s_preds))
print(classification_report(s_true, s_preds, digits=4))


##  Random Forest – Binary CKD Prediction

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, classification_report

# Initialize Random Forest
rf_bin = RandomForestClassifier(n_estimators=200, random_state=42)

# Train
rf_bin.fit(Xb_train_sc, yb_train)

# Predict
y_pred_rf_bin = rf_bin.predict(Xb_test_sc)

# Evaluation
print("=== Random Forest: Binary CKD Prediction ===")
print(confusion_matrix(yb_test, y_pred_rf_bin))
print(classification_report(yb_test, y_pred_rf_bin, digits=4))


## XGBoost – Binary CKD Prediction

In [None]:
import xgboost as xgb 

# Initialize XGBoost classifier
xgb_bin = xgb.XGBClassifier(
    n_estimators=200,
    learning_rate=0.1,
    max_depth=4,
    random_state=42,
    use_label_encoder=False,
    eval_metric='logloss'
)

# Train
xgb_bin.fit(Xb_train_sc, yb_train)

# Predict
y_pred_xgb_bin = xgb_bin.predict(Xb_test_sc)

# Evaluation
print("=== XGBoost: Binary CKD Prediction ===")
print(confusion_matrix(yb_test, y_pred_xgb_bin))
print(classification_report(yb_test, y_pred_xgb_bin, digits=4))


## Random Forest – CKD Stage Prediction

In [None]:
# Initialize Random Forest for multi-class
rf_stage = RandomForestClassifier(n_estimators=200, random_state=42)

# Train
rf_stage.fit(Xs_train_sc, ys_train0)

# Predict
y_pred_rf_stage = rf_stage.predict(Xs_test_sc)

# Evaluation
print("=== Random Forest: CKD Stage Prediction ===")
print(confusion_matrix(ys_test0, y_pred_rf_stage))
print(classification_report(ys_test0, y_pred_rf_stage, digits=4))


## XGBoost – CKD Stage Prediction

In [None]:
# Initialize XGBoost classifier for multi-class
xgb_stage = xgb.XGBClassifier(
    n_estimators=200,
    learning_rate=0.1,
    max_depth=4,
    random_state=42,
    use_label_encoder=False,
    eval_metric='mlogloss'
)

# Train
xgb_stage.fit(Xs_train_sc, ys_train0)

# Predict
y_pred_xgb_stage = xgb_stage.predict(Xs_test_sc)

# Evaluation
print("=== XGBoost: CKD Stage Prediction ===")
print(confusion_matrix(ys_test0, y_pred_xgb_stage))
print(classification_report(ys_test0, y_pred_xgb_stage, digits=4))


## Prepare Data for PyTorch

In [None]:
# Binary target
BINARY_LABEL = 'ckd_pred'

# Multi-class target (stage)
STAGE_LABEL = 'ckd_stage'
FEATURE_COLS = [
    "serum_creatinine",
    "gfr",
    "bun",
    "serum_calcium",
    "ana",
    "c3_c4",
    "hematuria",
    "oxalate_levels",
    "urine_ph",
    "blood_pressure",
    "months"
]

In [None]:
# Binary classification
X_bin = df[FEATURE_COLS].values
y_bin = df[BINARY_LABEL].values

# Stage prediction
X_stage = df[FEATURE_COLS].values
y_stage = df[STAGE_LABEL].astype(int).values


## Train/Test Split & Scaling

In [None]:
# ==========================
# Binary CKD Prediction Split
# ==========================
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

Xb_train, Xb_test, yb_train, yb_test = train_test_split(
    X_bin, y_bin, test_size=0.2, random_state=42, stratify=y_bin
)

scaler_bin = StandardScaler()
Xb_train_sc = scaler_bin.fit_transform(Xb_train)
Xb_test_sc = scaler_bin.transform(Xb_test)

# ==========================
# CKD Stage Prediction Split
# ==========================
Xs_train, Xs_test, ys_train, ys_test = train_test_split(
    X_stage, y_stage, test_size=0.2, random_state=42, stratify=y_stage
)

scaler_stage = StandardScaler()
Xs_train_sc = scaler_stage.fit_transform(Xs_train)
Xs_test_sc = scaler_stage.transform(Xs_test)


## PyTorch Dataset & DataLoader

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
from torch.utils.data import Dataset, DataLoader

# -----------------------------
# Binary CKD: Map to 0/1
# -----------------------------
y_bin = np.array([1 if val=='CKD' else 0 for val in y_bin])

# -----------------------------
# Train/Test Split
# -----------------------------
# Binary CKD
Xb_train, Xb_test, yb_train, yb_test = train_test_split(
    X_bin, y_bin, test_size=0.2, random_state=42, stratify=y_bin
)

scaler_bin = StandardScaler()
Xb_train_sc = scaler_bin.fit_transform(Xb_train)
Xb_test_sc = scaler_bin.transform(Xb_test)

# Stage prediction (already numeric)
Xs_train, Xs_test, ys_train, ys_test = train_test_split(
    X_stage, y_stage, test_size=0.2, random_state=42, stratify=y_stage
)

scaler_stage = StandardScaler()
Xs_train_sc = scaler_stage.fit_transform(Xs_train)
Xs_test_sc = scaler_stage.transform(Xs_test)

# -----------------------------
# PyTorch Dataset
# -----------------------------
class TabularDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

batch_size = 64

# Binary CKD
train_ds_bin = TabularDataset(Xb_train_sc, yb_train)
test_ds_bin = TabularDataset(Xb_test_sc, yb_test)
train_loader_bin = DataLoader(train_ds_bin, batch_size=batch_size, shuffle=True)
test_loader_bin = DataLoader(test_ds_bin, batch_size=batch_size, shuffle=False)

# Stage prediction (0-index)
num_classes = len(np.unique(ys_train))
train_ds_stage = TabularDataset(Xs_train_sc, ys_train-1)
test_ds_stage = TabularDataset(Xs_test_sc, ys_test-1)
train_loader_stage = DataLoader(train_ds_stage, batch_size=batch_size, shuffle=True)
test_loader_stage = DataLoader(test_ds_stage, batch_size=batch_size, shuffle=False)


## Define MLP Model

In [None]:
import torch.nn as nn

class MLP(nn.Module):
    def __init__(self, input_dim, num_classes):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, num_classes)
        )
    def forward(self, x):
        return self.net(x)

# --------------------------
# Model Instances
# --------------------------
input_dim = Xb_train_sc.shape[1]
model_bin = MLP(input_dim, 2)          # Binary CKD
model_stage = MLP(input_dim, num_classes)  # CKD Stage


## Training Function


In [None]:
import torch.optim as optim

def train_model(model, train_loader, val_loader=None, epochs=30, lr=1e-3):
    model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    for epoch in range(1, epochs+1):
        model.train()
        total_loss = 0
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)

            optimizer.zero_grad()
            logits = model(X_batch)
            loss = criterion(logits, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item() * X_batch.size(0)

        avg_loss = total_loss / len(train_loader.dataset)

        # Validation Accuracy
        if val_loader:
            model.eval()
            correct, total = 0, 0
            with torch.no_grad():
                for Xv, yv in val_loader:
                    Xv, yv = Xv.to(device), yv.to(device)
                    preds = model(Xv).argmax(dim=1)
                    correct += (preds==yv).sum().item()
                    total += yv.size(0)
            acc = correct/total
            print(f"Epoch {epoch:03d} | Loss: {avg_loss:.4f} | Val Acc: {acc:.4f}")
    return model


## Train Models

In [None]:
# -------------------------------
# Imports
# -------------------------------
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report

# -------------------------------
# Load dataset
# -------------------------------
df = pd.read_csv("updated_ckd_dataset_with_stages.csv")

FEATURE_COLS = [
    "serum_creatinine","gfr","bun","serum_calcium",
    "ana","c3_c4","hematuria","oxalate_levels",
    "urine_ph","blood_pressure","months"
]

BINARY_LABEL = "ckd_pred"
STAGE_LABEL  = "ckd_stage"

# -------------------------------
# Preprocess labels
# -------------------------------
# Binary CKD: Map CKD->1, No CKD->0
df[BINARY_LABEL] = df[BINARY_LABEL].apply(lambda x: 1 if x=='CKD' else 0)

# Stage labels: convert to 0-based integers
df[STAGE_LABEL] = df[STAGE_LABEL].astype(int)
df['stage0'] = df[STAGE_LABEL] - df[STAGE_LABEL].min()  # 0-based

# -------------------------------
# Feature & label arrays
# -------------------------------
X_bin = df[FEATURE_COLS].values
y_bin = df[BINARY_LABEL].values

X_stage = df[FEATURE_COLS].values
y_stage = df['stage0'].values

# -------------------------------
# Train-test split
# -------------------------------
Xb_train, Xb_test, yb_train, yb_test = train_test_split(
    X_bin, y_bin, test_size=0.2, random_state=42, stratify=y_bin
)

Xs_train, Xs_test, ys_train, ys_test = train_test_split(
    X_stage, y_stage, test_size=0.2, random_state=42, stratify=y_stage
)

# -------------------------------
# Scaling
# -------------------------------
scaler_bin = StandardScaler()
Xb_train_sc = scaler_bin.fit_transform(Xb_train)
Xb_test_sc  = scaler_bin.transform(Xb_test)

scaler_stage = StandardScaler()
Xs_train_sc = scaler_stage.fit_transform(Xs_train)
Xs_test_sc  = scaler_stage.transform(Xs_test)

# -------------------------------
# Dataset Class
# -------------------------------
class TabularDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# -------------------------------
# DataLoader
# -------------------------------
batch_size = 64

train_loader_bin = DataLoader(TabularDataset(Xb_train_sc, yb_train), batch_size=batch_size, shuffle=True)
test_loader_bin  = DataLoader(TabularDataset(Xb_test_sc, yb_test), batch_size=batch_size, shuffle=False)

train_loader_stage = DataLoader(TabularDataset(Xs_train_sc, ys_train), batch_size=batch_size, shuffle=True)
test_loader_stage  = DataLoader(TabularDataset(Xs_test_sc, ys_test), batch_size=batch_size, shuffle=False)

# -------------------------------
# MLP Model
# -------------------------------
class MLP(nn.Module):
    def __init__(self, input_dim, num_classes):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, 64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, num_classes)
        )
    def forward(self, x):
        return self.net(x)

# -------------------------------
# Training Function
# -------------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def train_model(model, train_loader, val_loader=None, epochs=30, lr=1e-3):
    model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    for epoch in range(1, epochs+1):
        model.train()
        total_loss = 0
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            logits = model(X_batch)
            loss = criterion(logits, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item() * X_batch.size(0)
        avg_loss = total_loss / len(train_loader.dataset)

        if val_loader:
            model.eval()
            correct, total = 0, 0
            with torch.no_grad():
                for Xv, yv in val_loader:
                    Xv, yv = Xv.to(device), yv.to(device)
                    preds = model(Xv).argmax(dim=1)
                    correct += (preds == yv).sum().item()
                    total += yv.size(0)
            acc = correct / total
            print(f"Epoch {epoch:03d} | Loss: {avg_loss:.4f} | Val Acc: {acc:.4f}")

    return model

# -------------------------------
# Model Initialization
# -------------------------------
input_dim = Xb_train_sc.shape[1]

model_bin   = MLP(input_dim, num_classes=2)
model_stage = MLP(input_dim, num_classes=len(np.unique(ys_train)))

# -------------------------------
# Training
# -------------------------------
print("Training Binary CKD Model...")
model_bin = train_model(model_bin, train_loader_bin, test_loader_bin, epochs=30)

print("Training CKD Stage Model...")
model_stage = train_model(model_stage, train_loader_stage, test_loader_stage, epochs=40)


## Evaluation

In [None]:
from sklearn.metrics import classification_report, confusion_matrix




In [None]:

# --------------------------
# Binary CKD
# --------------------------
model_bin.eval()
preds_bin, true_bin = [], []

with torch.no_grad():
    for Xb, yb in test_loader_bin:
        Xb = Xb.to(device)
        preds_bin.extend(model_bin(Xb).argmax(dim=1).cpu().numpy())
        true_bin.extend(yb.numpy())

print("=== Binary CKD Prediction ===")
print(confusion_matrix(true_bin, preds_bin))
print(classification_report(true_bin, preds_bin, digits=4))

# --------------------------
# CKD Stage
# --------------------------
model_stage.eval()
preds_stage, true_stage = [], []

with torch.no_grad():
    for Xs, ys in test_loader_stage:
        Xs = Xs.to(device)
        preds_stage.extend(model_stage(Xs).argmax(dim=1).cpu().numpy())
        true_stage.extend(ys.numpy())

print("=== CKD Stage Prediction ===")
print(confusion_matrix(true_stage, preds_stage))
print(classification_report(true_stage, preds_stage, digits=4))


In [None]:

# --------------------------
# Binary CKD
# --------------------------
model_bin.eval()
preds_bin, true_bin = [], []

with torch.no_grad():
    for Xb, yb in test_loader_bin:
        Xb = Xb.to(device)
        preds_bin.extend(model_bin(Xb).argmax(dim=1).cpu().numpy())
        true_bin.extend(yb.numpy())

print("=== Binary CKD Prediction ===")
print(confusion_matrix(true_bin, preds_bin))
print(classification_report(true_bin, preds_bin, digits=4))

# --------------------------
# CKD Stage
# --------------------------
model_stage.eval()
preds_stage, true_stage = [], []

with torch.no_grad():
    for Xs, ys in test_loader_stage:
        Xs = Xs.to(device)
        preds_stage.extend(model_stage(Xs).argmax(dim=1).cpu().numpy())
        true_stage.extend(ys.numpy())

print("=== CKD Stage Prediction ===")
print(confusion_matrix(true_stage, preds_stage))
print(classification_report(true_stage, preds_stage, digits=4))


## Explainable AI (Optional: SHAP)

In [None]:
import shap

# SHAP DeepExplainer
explainer = shap.DeepExplainer(model_bin, torch.tensor(Xb_train_sc, dtype=torch.float32))
shap_values = explainer.shap_values(torch.tensor(Xb_test_sc, dtype=torch.float32))

shap.summary_plot(shap_values, Xb_test_sc, feature_names=FEATURE_COLS)
