## BAYESIAN REGRESSION


In [None]:
import pandas as pd
import bambi as bmb
import arviz as az
from sklearn.metrics import roc_auc_score

# Load the data
data = pd.read_csv('RF_imputation_NEW.csv')

# Drop unwanted columns
data = data.drop(columns=['deathtime', 'survival_time', 'LOS', 'Unnamed_0', 'V1', 'admittime', 'ID', 'group', 'tLOS', 'subject_id'])

# Normalize the predictors
predictor_columns = data.columns.difference(['outcome'])
data[predictor_columns] = (data[predictor_columns] - data[predictor_columns].mean()) / data[predictor_columns].std()

# Split the data into training and validation sets
train_data = data.sample(frac=0.7, random_state=213)
valid_data = data.drop(train_data.index)

# Define and fit the Bayesian logistic regression model using Laplace approximation
model = bmb.Model('outcome ~ ' + ' + '.join(predictor_columns), train_data, family='bernoulli')
fitted_model = model.fit(inference_method="laplace")

# Summarize the model
print(fitted_model.summary())

# Evaluate the model performance on the training set
train_preds_prob = fitted_model.predict(train_data)
train_auc_value = roc_auc_score(train_data['outcome'], train_preds_prob)
print("Train AUC:", train_auc_value)

# Evaluate the model performance on the validation set
valid_preds_prob = fitted_model.predict(valid_data)
valid_auc_value = roc_auc_score(valid_data['outcome'], valid_preds_prob)
print("Validation AUC:", valid_auc_value)



In [None]:
# McMC

import pandas as pd
import pymc3 as pm
from sklearn.metrics import roc_auc_score
import numpy as np

# Load the data
data = pd.read_csv('RF_imputation_NEW.csv')

# Drop unwanted columns
data = data.drop(columns=['deathtime', 'survival_time', 'LOS', 'Unnamed_0', 'V1', 'admittime', 'ID', 'group', 'tLOS', 'subject_id', 'COPD', 'CHD_with_no_MI'])

# Normalize the predictors
predictor_columns = data.columns.difference(['outcome'])
data[predictor_columns] = (data[predictor_columns] - data[predictor_columns].mean()) / data[predictor_columns].std()

# Split the data into training and validation sets
train_data = data.sample(frac=0.7, random_state=213)
valid_data = data.drop(train_data.index)

# Convert to numpy arrays for PyMC3
X_train = train_data[predictor_columns].values
y_train = train_data['outcome'].values
X_valid = valid_data[predictor_columns].values
y_valid = valid_data['outcome'].values

with pm.Model() as model:
    # Define the priors
    intercept = pm.Normal('Intercept', mu=0, sigma=1)
    coefs = pm.Normal('coefs', mu=0, sigma=1, shape=X_train.shape[1])
    
    # Define the likelihood
    linear_combination = intercept + pm.math.dot(X_train, coefs)
    probability = pm.Deterministic('p', pm.math.sigmoid(linear_combination))
    outcome = pm.Bernoulli('outcome', p=probability, observed=y_train)
    
    # Sample from the posterior
    trace = pm.sample(2000, tune=1000, cores=4, return_inferencedata=False)

# Summarize the model
summary = pm.summary(trace)
print(summary)

# Predict on training data
with model:
    ppc_train = pm.sample_posterior_predictive(trace, var_names=['p'])
train_preds_prob = np.mean(ppc_train['p'], axis=0)
train_auc_value = roc_auc_score(y_train, train_preds_prob)
print("Train AUC:", train_auc_value)

# Predict on validation data
with model:
    ppc_valid = pm.sample_posterior_predictive(trace, var_names=['p'], samples=1000, data=dict(X_train=X_valid))
valid_preds_prob = np.mean(ppc_valid['p'], axis=0)
valid_auc_value = roc_auc_score(y_valid, valid_preds_prob)
print("Validation AUC:", valid_auc_value)
