In [1]:
# Hospital Readmission Prediction using XGBoost and LIME

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from xgboost import XGBClassifier
from imblearn.over_sampling import SMOTE
import lime
import lime.lime_tabular
import pickle

# Load data
file_path = r"C:\\Users\\alir13\\Downloads\\hospital_readmissions - Copy.csv"
df = pd.read_csv(file_path)

# Encode categorical features
label_encoders = {}
for col in df.select_dtypes(include=['object']).columns:
    if col != 'readmitted':
        le = LabelEncoder()
        df[col] = le.fit_transform(df[col])
        label_encoders[col] = le

# Encode target
df['readmitted'] = df['readmitted'].map({'no': 0, 'yes': 1})

# Select features and target
X = df.drop('readmitted', axis=1)
y = df['readmitted']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Apply SMOTE to balance
smote = SMOTE(random_state=42)
X_train_res, y_train_res = smote.fit_resample(X_train, y_train)

# Train model
model = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
model.fit(X_train_res, y_train_res)

# Evaluate
y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print("Accuracy:", acc)
print(classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

# Save model and encoders
with open("xgb_model.pkl", "wb") as f:
    pickle.dump(model, f)

with open("label_encoders.pkl", "wb") as f:
    pickle.dump(label_encoders, f)

# LIME explainer
explainer = lime.lime_tabular.LimeTabularExplainer(
    training_data=np.array(X_train),
    feature_names=X.columns,
    class_names=['No Readmission', 'Readmitted'],
    mode='classification'
)

# Save explainer
with open("lime_explainer.pkl", "wb") as f:
    pickle.dump(explainer, f)

print("\nModel training complete and saved.")


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Accuracy: 0.5988
              precision    recall  f1-score   support

           0       0.62      0.62      0.62      2658
           1       0.57      0.58      0.57      2342

    accuracy                           0.60      5000
   macro avg       0.60      0.60      0.60      5000
weighted avg       0.60      0.60      0.60      5000

Confusion Matrix:
 [[1639 1019]
 [ 987 1355]]


AttributeError: Can't get local object 'BaseDiscretizer.__init__.<locals>.<lambda>'

In [2]:
import lime.lime_tabular

# Re-create LIME explainer with correct training data
explainer = lime.lime_tabular.LimeTabularExplainer(
    training_data=np.array(X_train_res),  # SMOTE-resampled data!
    feature_names=X.columns.tolist(),
    class_names=['No Readmission', 'Readmitted'],
    mode='classification'
)

# Save again
with open("lime_explainer.pkl", "wb") as f:
    pickle.dump(explainer, f)

print("LIME explainer saved successfully.")


AttributeError: Can't get local object 'BaseDiscretizer.__init__.<locals>.<lambda>'