In [1]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append("../")
from PREDICT import PREDICT
from PREDICT.Models import *
from PREDICT.Metrics import *
from PREDICT.Triggers import *
from PREDICT.Plots import *
from Comparison.Detect_Functions import *
import numpy as np
import pandas as pd
from datetime import timedelta
import datetime
import datetime as dt
import statistics
import matplotlib.pyplot as plt
import seaborn as sns
import arviz as az
import bambi as bmb
import statsmodels.api as sm

import warnings
warnings.filterwarnings('ignore')

%env PYTENSOR_FLAGS=exception_verbosity=high,floatX=float32

env: PYTENSOR_FLAGS=exception_verbosity=high,floatX=float32


In [None]:
recalthreshold = 0.811 # Paper has AUROC of 0.814, with lower CI at 0.811 

# Define the coefficients (hazard ratios converted to log-odds)
coefs = {"White": np.log(1), 
    "Indian": np.log(1.43),
    "Pakistani": np.log(1.8),
    "Bangladeshi": np.log(1.35),
    "Other_Asian": np.log(1.15),
    "Black_Caribbean": np.log(1.08),
    "Black_African": np.log(0.58),
    "Chinese": np.log(0.69),
    "Other": np.log(1.04),
    "Age": np.log(1.66),
    "BMI": np.log(1.08),
    "Townsend": np.log(1.37),
    "SBP": np.log(1.2),
    "CholHDL_ratio": np.log(1.17),
    "Family_CHD": np.log(1.99),
    "Current_smoker": np.log(1.8),
    "Treated_HTN": np.log(1.54),
    "DM": np.log(2.54),
    "RA": np.log(1.5),
    "AF": np.log(3.06),
    "Renal_disease": np.log(1.7),
    "Age_BMI": np.log(0.976),
    "Age_Townsend": np.log(0.938),
    "Age_SBP": np.log(0.966),
    "Age_Family_CHD": np.log(0.927),
    "Age_Smoking": np.log(0.931),
    "Age_Treated_HTN": np.log(0.952),
    "Age_DM": np.log(0.904),
    "Age_AF": np.log(0.858)
}


# Percentage variables (/100)
percent_family_history_chd = 0.126
percent_treated_hypertension = 0.0712
percent_rheumatoid_arthritis = 0.0093
percent_atrial_fibrillation = 0.0035
percent_renal_disease = 0.0016

# Age variable
median_age, IQR_age = 49, 19
mean_age, std_age = median_age, IQR_age / 1.35

# Mean and standard deviation variables
mean_bmi, std_bmi = 33.8, 6.1
mean_townsend, std_townsend = 17.67, 3.534
mean_sbp, std_sbp = 26.6, 4.4
mean_chol_hdl_ratio, std_chol_hdl_ratio = 3.66, 0.144

startDate = pd.to_datetime('01-06-2019', dayfirst=True) # 01-06-2019
endDate = pd.to_datetime('31-12-2021', dayfirst=True) # 31-12-2021
numdays = (endDate - startDate).days


intercept = None
baseline_prob = 0.233 # 23.3%

prev_increases = np.arange(1.0011, 1.003, 0.0002).tolist() # Increase in diabetes prevalence over time STARTS AT 1.0007
smoking_decrease = 0.99  # Decrease in smoking prevalence over time
intercept_change = -0.03  # Change in intercept over time
bias = 0.0

percent_type_2_diabetes = 0.017 # reset these for each start date
percent_current_smoker = 0.228
bayes_dict = {"BayesianCoefficients":{}}
undetected = dict({"Static Threshold": 0, "Regular Testing": 0, "SPC3": 0, "SPC5": 0, "SPC7": 0, "Bayesian": 0})

15

In [None]:
# Pretrain on fake data
numdays_pretrain = 720
num_patients = 500
mydict = {
    'date': list(),
    'outcome': list(),
    'prediction': list(),
    'Age': list(),
    'BMI':list(),
    'Townsend': list(),
    'SBP': list(),
    'CholHDL_ratio': list(),
    'Family_CHD': list(),
    'Current_smoker': list(),
    'Treated_HTN': list(),
    'DM': list(),
    'RA': list(),
    'AF': list(),
    'Renal_disease': list()
}

for i in range(numdays):
    curday = startDate + dt.timedelta(days=i)

    # Generate random factors for patients using min max normalization for non-binary values
    age = np.random.normal(mean_age, std_age, num_patients) 
    age = (age - np.min(age)) / (np.max(age) - np.min(age))
    bmi = np.random.normal(mean_bmi, std_bmi, num_patients)
    bmi = (bmi - np.min(bmi)) / (np.max(bmi) - np.min(bmi))
    townsend = np.random.normal(mean_townsend, std_townsend, num_patients)
    townsend = (townsend - np.min(townsend)) / (np.max(townsend) - np.min(townsend))
    SBP = np.random.normal(mean_sbp, std_sbp, num_patients)
    SBP = (SBP - np.min(SBP)) / (np.max(SBP) - np.min(SBP))
    chol_hdl_ratio = np.random.normal(mean_chol_hdl_ratio, std_chol_hdl_ratio, num_patients)
    chol_hdl_ratio = (chol_hdl_ratio - np.min(chol_hdl_ratio)) / (np.max(chol_hdl_ratio) - np.min(chol_hdl_ratio))
    pat_factors = {"Age": age, 
        "BMI": bmi,
        "Townsend": townsend,
        "SBP": SBP,
        "CholHDL_ratio": chol_hdl_ratio,
        "Family_CHD": np.random.binomial(1, percent_family_history_chd, num_patients),
        "Current_smoker": np.random.binomial(1, percent_current_smoker, num_patients),
        "Treated_HTN": np.random.binomial(1, percent_treated_hypertension, num_patients),
        "DM": np.random.binomial(1, percent_type_2_diabetes, num_patients),
        "RA": np.random.binomial(1, percent_rheumatoid_arthritis, num_patients),
        "AF": np.random.binomial(1, percent_atrial_fibrillation, num_patients),
        "Renal_disease": np.random.binomial(1, percent_renal_disease, num_patients)
    }
    epsilon = np.random.normal(0, 0.2, num_patients) # Simulate error term (mean=0, std=0.2)

    #ethnicity_assignment = select_ethnic_group(num_patients)
    #pat_factors.update(ethnicity_assignment) # combine ethnicity dict with ethnic

    # Calculate baseline log-odds
    #weighted_coef_sum = coefs['White']*pat_factors['White'] + coefs['Indian']*pat_factors['Indian'] + coefs['Pakistani']*pat_factors['Pakistani'] + coefs['Bangladeshi']*pat_factors['Bangladeshi'] 
    #weighted_coef_sum += coefs['Other_Asian']*pat_factors['Other_Asian'] + coefs['Black_Caribbean']*pat_factors['Black_Caribbean'] + coefs['Black_African']*pat_factors['Black_African'] 
    #weighted_coef_sum += coefs['Chinese']*pat_factors['Chinese'] + coefs['Other']*pat_factors['Other'] + 
    weighted_coef_sum = coefs['Age']*(pat_factors['Age']) + coefs['BMI']*(pat_factors['BMI']) 
    weighted_coef_sum += coefs['Townsend']*(pat_factors['Townsend']) + coefs['SBP']*(pat_factors['SBP']) + coefs['CholHDL_ratio']*(pat_factors['CholHDL_ratio']) 
    weighted_coef_sum += coefs["Family_CHD"]*(pat_factors["Family_CHD"]) + coefs["Current_smoker"]*(pat_factors["Current_smoker"]) 
    weighted_coef_sum += coefs["Treated_HTN"]*(pat_factors["Treated_HTN"]) + coefs["DM"]*(pat_factors["DM"]) + coefs["RA"]*(pat_factors["RA"]) 
    weighted_coef_sum += coefs["AF"]*(pat_factors["AF"]) + coefs["Renal_disease"]*(pat_factors["Renal_disease"]) + (coefs["Age_BMI"] * pat_factors["Age"] * pat_factors["BMI"]) 
    weighted_coef_sum += (coefs["Age_Townsend"] * pat_factors["Age"] * pat_factors["Townsend"]) + (coefs["Age_SBP"] * pat_factors["Age"] * pat_factors["SBP"]) 
    weighted_coef_sum += (coefs["Age_Family_CHD"] * pat_factors["Age"] * pat_factors["Family_CHD"]) + (coefs["Age_Smoking"] * pat_factors["Age"] * pat_factors["Current_smoker"]) 
    weighted_coef_sum += (coefs["Age_Treated_HTN"] * pat_factors["Age"] * pat_factors["Treated_HTN"]) + (coefs["Age_DM"] * pat_factors["Age"] * pat_factors["DM"])
    weighted_coef_sum += (coefs["Age_AF"] * pat_factors["Age"] * pat_factors["AF"])

    intercept = np.log(baseline_prob / (1 - baseline_prob))
    
    # Compute log-odds
    lp = intercept + weighted_coef_sum    
    curpredictions = 1 / (1 + np.exp(-lp))  # Convert to probability
    mod_pred = 1 / (1 + np.exp(-(lp + epsilon)))
    curoutcomes = np.random.binomial(1, mod_pred)         
    

    # Append to dictionary from the distribution for each of the variables (Table 1)
    mydict['date'].extend([curday] * num_patients)
    mydict['outcome'].extend(curoutcomes)
    mydict['prediction'].extend(curpredictions)
    mydict['Age'].extend(pat_factors['Age'])
    mydict['BMI'].extend(pat_factors['BMI'])
    mydict['Townsend'].extend(pat_factors['Townsend'])
    mydict['SBP'].extend(pat_factors['SBP'])
    mydict['CholHDL_ratio'].extend(pat_factors['CholHDL_ratio'])
    mydict['Family_CHD'].extend(pat_factors['Family_CHD'])
    mydict['Current_smoker'].extend(pat_factors['Current_smoker'])
    mydict['Treated_HTN'].extend(pat_factors['Treated_HTN'])
    mydict['DM'].extend(pat_factors['DM'])
    mydict['RA'].extend(pat_factors['RA'])
    mydict['AF'].extend(pat_factors['AF'])
    mydict['Renal_disease'].extend(pat_factors['Renal_disease'])


pretrain_data = pd.DataFrame(mydict)
pretrain_data['Age_BMI'] = pretrain_data['Age'] * pretrain_data['BMI']
pretrain_data['Age_Townsend'] = pretrain_data['Age'] * pretrain_data['Townsend']
pretrain_data['Age_SBP'] = pretrain_data['Age'] * pretrain_data['SBP']
pretrain_data['Age_Family_CHD'] = pretrain_data['Age'] * pretrain_data['Family_CHD']
pretrain_data['Age_Smoking'] = pretrain_data['Age'] * pretrain_data['Current_smoker']
pretrain_data['Age_Treated_HTN'] = pretrain_data['Age'] * pretrain_data['Treated_HTN']
pretrain_data['Age_DM'] = pretrain_data['Age'] * pretrain_data['DM']
pretrain_data['Age_AF'] = pretrain_data['Age'] * pretrain_data['AF']

In [10]:
m = sm.Logit(
    pretrain_data['outcome'],
    sm.add_constant(
        pretrain_data[[
            'Age', 'BMI', 'Townsend', 'SBP', 
            'CholHDL_ratio', 'Family_CHD', 'Current_smoker', 'Treated_HTN', 'DM', 
            'RA',  'AF', 'Renal_disease', 'Age_BMI', 'Age_Townsend', 'Age_SBP', 
            'Age_Family_CHD', 'Age_Smoking', 'Age_Treated_HTN', 'Age_DM', 'Age_AF'
        ]]
    )
).fit()

Optimization terminated successfully.
         Current function value: 0.662786
         Iterations 5


In [11]:
m.summary()

0,1,2,3
Dep. Variable:,outcome,No. Observations:,94400.0
Model:,Logit,Df Residuals:,94379.0
Method:,MLE,Df Model:,20.0
Date:,"Tue, 20 Jan 2026",Pseudo R-squ.:,0.02518
Time:,22:06:45,Log-Likelihood:,-62567.0
converged:,True,LL-Null:,-64183.0
Covariance Type:,nonrobust,LLR p-value:,0.0

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,-1.1286,0.079,-14.331,0.000,-1.283,-0.974
Age,0.4629,0.141,3.283,0.001,0.186,0.739
BMI,0.1059,0.085,1.242,0.214,-0.061,0.273
Townsend,0.3818,0.085,4.496,0.000,0.215,0.548
SBP,-0.0011,0.085,-0.013,0.990,-0.168,0.165
CholHDL_ratio,0.1312,0.032,4.051,0.000,0.068,0.195
Family_CHD,0.6663,0.053,12.622,0.000,0.563,0.770
Current_smoker,0.6677,0.042,16.054,0.000,0.586,0.749
Treated_HTN,0.3640,0.068,5.315,0.000,0.230,0.498


In [12]:
bayesian_priors = {
    'Intercept': (-1.1286, 0.079),
    'Age': (0.4629, 0.141),
    'BMI': (0.1059, 0.085),
    'Townsend': (0.3818, 0.085),
    'SBP': (-0.0011, 0.085),
    'CholHDL_ratio': (0.1312, 0.032),
    'Family_CHD': (0.6663, 0.053),
    'Current_smoker': (0.6677, 0.042),
    'Treated_HTN': (0.3640, 0.068),
    'DM': (1.1455, 0.141),
    'RA': (0.2935, 0.069),
    'AF': (1.4361, 0.325),
    'Renal_disease': (0.4390, 0.168),
    'Age_BMI': (-0.0690, 0.156),
    'Age_Townsend': (-0.0973, 0.155),
    'Age_SBP': (0.1378, 0.156),
    'Age_Family_CHD': (-0.0798, 0.097),
    'Age_Smoking': (-0.1784, 0.076),
    'Age_Treated_HTN': (0.0887, 0.126),
    'Age_DM': (-0.4446, 0.256),
    'Age_AF': (-0.8560, 0.591)
}

In [13]:
# Get bootstrap OE with CI
preds = m.predict()
outcome = pretrain_data['outcome'].values
for i in range(1000):
    boot_indices = np.random.choice(range(len(outcome)), size=len(outcome), replace=True)
    boot_outcome = outcome[boot_indices]
    boot_preds = preds[boot_indices]
    boot_oe = boot_outcome.mean() / boot_preds.mean()
    if i == 0:
        oe_values = [boot_oe]
    else:
        oe_values.append(boot_oe)
        
print(f"Pretrain OE: {np.mean(oe_values)} with std: {np.std(oe_values)} and 95% CI: {np.percentile(oe_values, 2.5)} - {np.percentile(oe_values, 97.5)}")

Pretrain OE: 0.999930129286512 with std: 0.003742885968726292 and 95% CI: 0.9923591059354785 - 1.0073174599192969


In [None]:
resultsloc = "./Results/simulation/multivariate"
os.makedirs(resultsloc, exist_ok=True)
if not os.path.exists(os.path.join(resultsloc, 'performance_metrics.csv')):
    header = pd.DataFrame(columns=['Time', 'Accuracy', 'AUROC', 'Precision', 'CalibrationSlope', 'CITL',
    'OE', 'AUPRC', 'F1Score', 'impact_or_prev', 'Method', 'Data_Type'])
    header.to_csv(os.path.join(resultsloc, 'performance_metrics.csv'), index=False)

In [None]:
for num, prev_increase in enumerate(prev_increases):
    regular_ttd = []
    static_ttd = []
    spc_ttd3 = []
    spc_ttd5 = []
    spc_ttd7 = []
    bayesian_ttd = []
    mydict = {
            'date': list(),
            'outcome': list(),
            'prediction': list(),
            'Age': list(),
            'BMI':list(),
            'Townsend': list(),
            'SBP': list(),
            'CholHDL_ratio': list(),
            'Family_CHD': list(),
            'Current_smoker': list(),
            'Treated_HTN': list(),
            'DM': list(),
            'RA': list(),
            'AF': list(),
            'Renal_disease': list()
        }


    # Define date range
    numdays = (endDate - startDate).days

    
    for i in range(numdays):
        curday = startDate + dt.timedelta(days=i)
        
        # increase the prevalence of diabetes over time
        if i % 30 == 0:
            percent_type_2_diabetes *= prev_increase # this increases the probability by x% each month
            percent_current_smoker *= smoking_decrease # decrease the prevalence of smoking over time
            bias += intercept_change # change the intercept over time
        if percent_type_2_diabetes < 0 or percent_type_2_diabetes > 1:
            print("Percentage of people with DM", percent_type_2_diabetes)
        if percent_current_smoker < 0 or percent_current_smoker > 1:
            print("Percentage of people who are current smokers", percent_current_smoker)

        # Generate random factors for patients using min max normalization for non-binary values
        age = np.random.normal(mean_age, std_age, num_patients) 
        age = (age - np.min(age)) / (np.max(age) - np.min(age))
        bmi = np.random.normal(mean_bmi, std_bmi, num_patients)
        bmi = (bmi - np.min(bmi)) / (np.max(bmi) - np.min(bmi))
        townsend = np.random.normal(mean_townsend, std_townsend, num_patients)
        townsend = (townsend - np.min(townsend)) / (np.max(townsend) - np.min(townsend))
        SBP = np.random.normal(mean_sbp, std_sbp, num_patients)
        SBP = (SBP - np.min(SBP)) / (np.max(SBP) - np.min(SBP))
        chol_hdl_ratio = np.random.normal(mean_chol_hdl_ratio, std_chol_hdl_ratio, num_patients)
        chol_hdl_ratio = (chol_hdl_ratio - np.min(chol_hdl_ratio)) / (np.max(chol_hdl_ratio) - np.min(chol_hdl_ratio))
        pat_factors = {"Age": age, 
            "BMI": bmi,
            "Townsend": townsend,
            "SBP": SBP,
            "CholHDL_ratio": chol_hdl_ratio,
            "Family_CHD": np.random.binomial(1, percent_family_history_chd, num_patients),
            "Current_smoker": np.random.binomial(1, percent_current_smoker, num_patients),
            "Treated_HTN": np.random.binomial(1, percent_treated_hypertension, num_patients),
            "DM": np.random.binomial(1, percent_type_2_diabetes, num_patients),
            "RA": np.random.binomial(1, percent_rheumatoid_arthritis, num_patients),
            "AF": np.random.binomial(1, percent_atrial_fibrillation, num_patients),
            "Renal_disease": np.random.binomial(1, percent_renal_disease, num_patients)
        }
        epsilon = np.random.normal(0, 0.2, num_patients) # Simulate error term (mean=0, std=0.2)

        #ethnicity_assignment = select_ethnic_group(num_patients)
        #pat_factors.update(ethnicity_assignment) # combine ethnicity dict with ethnic

        # Calculate baseline log-odds
        weighted_coef_sum =  coefs['Age']*(pat_factors['Age']) + coefs['BMI']*(pat_factors['BMI']) 
        weighted_coef_sum += coefs['Townsend']*(pat_factors['Townsend']) + coefs['SBP']*(pat_factors['SBP']) + coefs['CholHDL_ratio']*(pat_factors['CholHDL_ratio']) 
        weighted_coef_sum += coefs["Family_CHD"]*(pat_factors["Family_CHD"]) + coefs["Current_smoker"]*(pat_factors["Current_smoker"]) 
        weighted_coef_sum += coefs["Treated_HTN"]*(pat_factors["Treated_HTN"]) + coefs["DM"]*(pat_factors["DM"]) + coefs["RA"]*(pat_factors["RA"]) 
        weighted_coef_sum += coefs["AF"]*(pat_factors["AF"]) + coefs["Renal_disease"]*(pat_factors["Renal_disease"]) + (coefs["Age_BMI"] * pat_factors["Age"] * pat_factors["BMI"]) 
        weighted_coef_sum += (coefs["Age_Townsend"] * pat_factors["Age"] * pat_factors["Townsend"]) + (coefs["Age_SBP"] * pat_factors["Age"] * pat_factors["SBP"]) 
        weighted_coef_sum += (coefs["Age_Family_CHD"] * pat_factors["Age"] * pat_factors["Family_CHD"]) + (coefs["Age_Smoking"] * pat_factors["Age"] * pat_factors["Current_smoker"]) 
        weighted_coef_sum += (coefs["Age_Treated_HTN"] * pat_factors["Age"] * pat_factors["Treated_HTN"]) + (coefs["Age_DM"] * pat_factors["Age"] * pat_factors["DM"])
        weighted_coef_sum += (coefs["Age_AF"] * pat_factors["Age"] * pat_factors["AF"])

    
        intercept = np.log(baseline_prob / (1 - baseline_prob))
        
        # Compute log-odds
        lp = intercept + weighted_coef_sum  
        curpredictions = 1 / (1 + np.exp(-lp))  # Convert to probability
        mod_pred = 1 / (1 + np.exp(-(lp + epsilon + bias)))
        curoutcomes = np.random.binomial(1, mod_pred)           
        

        # Append to dictionary from the distribution for each of the variables (Table 1)
        mydict['date'].extend([curday] * num_patients)
        mydict['outcome'].extend(curoutcomes)
        mydict['prediction'].extend(curpredictions)
        mydict['Age'].extend(pat_factors['Age'])
        mydict['BMI'].extend(pat_factors['BMI'])
        mydict['Townsend'].extend(pat_factors['Townsend'])
        mydict['SBP'].extend(pat_factors['SBP'])
        mydict['CholHDL_ratio'].extend(pat_factors['CholHDL_ratio'])
        mydict['Family_CHD'].extend(pat_factors['Family_CHD'])
        mydict['Current_smoker'].extend(pat_factors['Current_smoker'])
        mydict['Treated_HTN'].extend(pat_factors['Treated_HTN'])
        mydict['DM'].extend(pat_factors['DM'])
        mydict['RA'].extend(pat_factors['RA'])
        mydict['AF'].extend(pat_factors['AF'])
        mydict['Renal_disease'].extend(pat_factors['Renal_disease'])

        

    df = pd.DataFrame(mydict)
    df['Age_BMI'] = df['Age'] * df['BMI']
    df['Age_Townsend'] = df['Age'] * df['Townsend']
    df['Age_SBP'] = df['Age'] * df['SBP']
    df['Age_Family_CHD'] = df['Age'] * df['Family_CHD']
    df['Age_Smoking'] = df['Age'] * df['Current_smoker']
    df['Age_Treated_HTN'] = df['Age'] * df['Treated_HTN']
    df['Age_DM'] = df['Age'] * df['DM']
    df['Age_AF'] = df['Age'] * df['AF']
    
    df = prevent_constant_variable(df, startDate, endDate)
            
    ########################################### Baseline Testing #######################################
    model = EvaluatePredictions()
    mytest = PREDICT(data=df, model=model, startDate='min', endDate='max', timestep='month')
    mytest.addLogHook(Accuracy(model))
    mytest.addLogHook(AUROC(model))
    mytest.addLogHook(Precision(model))
    mytest.addLogHook(CalibrationSlope(model))
    mytest.addLogHook(CITL(model))
    mytest.addLogHook(OE(model))
    mytest.addLogHook(AUPRC(model))
    mytest.addLogHook(F1Score(model))
    mytest.run()
    log = mytest.getLog()


    baseline_metrics = pd.DataFrame({'Time': list(log["Accuracy"].keys()), 'Accuracy': list(log["Accuracy"].values()), 'AUROC': list(log["AUROC"].values()), 'Precision': list(log["Precision"].values()), 'CalibrationSlope': list(log["CalibrationSlope"].values()), 'CITL': list(log["CITL"].values()), 'OE': list(log["O/E"].values()), 'AUPRC': list(log["AUPRC"].values()), 'impact_or_prev': list([str(prev_increase)] * len(log["Accuracy"])), 'Method':list(['Baseline'] * len(log["Accuracy"]))})
    recalthreshold_lower = 0.999930129286512 - 3 * 0.003742885968726292
    recalthreshold_upper = 0.999930129286512 + 3 * 0.003742885968726292
    print(f"Using OE Threshold of {recalthreshold_lower} - {recalthreshold_upper} for recalibration detection.")
        
    
    ########################################### Save Metrics #######################################
    baseline_metrics["Data_Type"] = "Multivariate Simulation"
    baseline_metrics.to_csv(os.path.join(resultsloc, 'performance_metrics.csv'), mode='a', header=False, index=False)
    
    multivariate_metrics_df = get_metrics_recal_methods(df, percent_type_2_diabetes, recalthreshold_lower, recalthreshold_upper, model_name='QRISK_datasim')
    undetected, regular_ttd, static_ttd, spc_ttd3, spc_ttd5, spc_ttd7 = run_recalibration_tests(df, startDate, undetected, regular_ttd, static_ttd, spc_ttd3, spc_ttd5, spc_ttd7, recalthreshold_lower, recalthreshold_upper)
       
    ########################################### Bayesian Testing #######################################
    bayes_coef_ci = {
        key: (bayesian_priors[key][0] - 3 * bayesian_priors[key][1], bayesian_priors[key][0] + 3 * bayesian_priors[key][1])
        for key in bayesian_priors
    }
    bay_model = BayesianModel(input_data=df, 
                              priors=bayesian_priors,
                              model_formula = "outcome ~ Age + BMI + Townsend + SBP + CholHDL_ratio + Family_CHD + Current_smoker + Treated_HTN + DM + RA + AF + Renal_disease + Age_BMI + Age_Townsend + Age_SBP + Age_Family_CHD + Age_Smoking + Age_Treated_HTN + Age_DM + Age_AF",
                              verbose=False, draws=10000, tune=2000, chains=4, cores=8)
    bay_model.trigger = AlwaysTrigger(bay_model)
    mytest = PREDICT(data=df, model=bay_model, startDate='min', endDate='max', timestep='month')
    mytest.addLogHook(Accuracy(bay_model))
    mytest.addLogHook(AUROC(bay_model))
    mytest.addLogHook(Precision(bay_model))
    mytest.addLogHook(CalibrationSlope(bay_model))
    mytest.addLogHook(CITL(bay_model))
    mytest.addLogHook(OE(bay_model))
    mytest.addLogHook(AUPRC(bay_model))
    mytest.addLogHook(TrackBayesianCoefs(bay_model))
    mytest.addLogHook(F1Score(model))
    mytest.run()
    log = mytest.getLog()

    if "BayesianCoefficients" in log:
        bayes_dict["BayesianCoefficients"].update(log["BayesianCoefficients"])
    
    if "BayesianCoefficients" in log:
        bayes_dict["BayesianCoefficients"].update(log["BayesianCoefficients"])
        print(log["BayesianCoefficients"])
        
    ttd = find_bayes_coef_change(bayes_dict["BayesianCoefficients"], detectDate=startDate, undetected=undetected, thresholds=bayes_coef_ci)
    bayesian_ttd.append(ttd)

    bayes_metrics = pd.DataFrame({'Time': list(log["Accuracy"].keys()), 'Accuracy': list(log["Accuracy"].values()), 'AUROC': list(log["AUROC"].values()), 'Precision': list(log["Precision"].values()), 'CalibrationSlope': list(log["CalibrationSlope"].values()), 'CITL': list(log["CITL"].values()), 'OE': list(log["O/E"].values()), 'AUPRC': list(log["AUPRC"].values()), 'F1Score': list(log["F1score"].values()), 'impact_or_prev': list([str(percent_type_2_diabetes)] * len(log["Accuracy"])), 'Method':list(['Bayesian'] * len(log["Accuracy"]))})
    ########################################### Save Metrics #######################################

    # concatenate all the dataframes into one
    multivariate_metrics_df = pd.concat([multivariate_metrics_df, bayes_metrics], ignore_index=True)
    multivariate_metrics_df["Data_Type"] = "Multivariate Simulation"
    
    multivariate_metrics_df.to_csv(os.path.join(resultsloc, 'performance_metrics.csv'), mode='a', header=False, index=False)
    update_ttd_table(regular_ttd, static_ttd, spc_ttd3, spc_ttd5, spc_ttd7, bayesian_ttd, prev_increase, os.path.join(resultsloc, 'multivariate_ttd_tbl.csv'))
    
    # Generate plots
    plot_incidence_over_time(df, None, regular_ttd, static_ttd, spc_ttd3, spc_ttd5, spc_ttd7, bayesian_ttd, 'multivariate_'+str(prev_increase), fileloc=resultsloc)
    BayesianCoefsPlot(bayes_dict, 'multivariate_'+str(prev_increase), fileloc=resultsloc)
    pd.DataFrame(bayes_dict["BayesianCoefficients"]).to_csv(os.path.join(resultsloc, f'bayesian_coefficients_impact_{prev_increase}.csv'), index=False)

plot_time_to_detect(resultsloc, 'multivariate_ttd_tbl.csv', 'multivariate')