## BAYESIAN REGRESSION


In [None]:
import pandas as pd
import bambi as bmb
import arviz as az
from sklearn.metrics import roc_auc_score

# Load the data
data = pd.read_csv('RF_imputation_NEW.csv')

# Drop unwanted columns
data = data.drop(columns=['deathtime', 'survival_time', 'LOS', 'Unnamed_0', 'V1', 'admittime', 'ID', 'group', 'tLOS', 'subject_id'])

# Normalize the predictors
predictor_columns = data.columns.difference(['outcome'])
data[predictor_columns] = (data[predictor_columns] - data[predictor_columns].mean()) / data[predictor_columns].std()

# Split the data into training and validation sets
train_data = data.sample(frac=0.7, random_state=213)
valid_data = data.drop(train_data.index)

# Define and fit the Bayesian logistic regression model using Laplace approximation
model = bmb.Model('outcome ~ ' + ' + '.join(predictor_columns), train_data, family='bernoulli')
fitted_model = model.fit(inference_method="laplace")

# Summarize the model
print(fitted_model.summary())

# Evaluate the model performance on the training set
train_preds_prob = fitted_model.predict(train_data)
train_auc_value = roc_auc_score(train_data['outcome'], train_preds_prob)
print("Train AUC:", train_auc_value)

# Evaluate the model performance on the validation set
valid_preds_prob = fitted_model.predict(valid_data)
valid_auc_value = roc_auc_score(valid_data['outcome'], valid_preds_prob)
print("Validation AUC:", valid_auc_value)



In [79]:
pip install pandas numpy pymc3 arviz scikit-learn


Note: you may need to restart the kernel to use updated packages.
