In [3]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append("../")
from PREDICT import PREDICT
from PREDICT.Models import *
from PREDICT.Metrics import *
from PREDICT.Triggers import *
from PREDICT.Plots import *
from Comparison.Detect_Functions import *
import numpy as np
import pandas as pd
from datetime import timedelta
import datetime
import statistics
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')

%env PYTENSOR_FLAGS=exception_verbosity=high#,optimizer=fast_compile

env: PYTENSOR_FLAGS=exception_verbosity=high#,optimizer=fast_compile


In [4]:
startDate = pd.to_datetime('01-06-2019', dayfirst=True) # 01-06-2019
endDate = pd.to_datetime('31-12-2021', dayfirst=True) # 31-12-2021
num_patients = 40 # number of patients per each timestep

## Fast Change - COVID Simulation

In [5]:
recalthreshold = 0.86 # Paper has AUROC of 0.91, with lower CI at 0.86

custom_impacts = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9,1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 2.0, 2.5, 3.0] #
switchDateStrings = ['01-04-2020'] # Keep this as just one switchDate as other methods only look at one startDate/deployment date
undetected = dict({"Static Threshold": 0, "Regular Testing": 0, "SPC3": 0, "SPC5":0, "SPC7":0, "Bayesian": 0})

hr_age = 0.5
hr_ldh = 9.8
hr_comorbidity = 3.9

log_age = np.log(hr_age)
log_ldh = np.log(hr_ldh)
log_comorbidity = np.log(hr_comorbidity)

bayes_dict = {"BayesianCoefficients":{}}

for switchDateidx, switchDateString in enumerate(switchDateStrings):
    for custom_impact in custom_impacts:
        regular_ttd = []
        static_ttd = []
        spc_ttd3 = []
        spc_ttd5 = []
        spc_ttd7 = []
        bayesian_ttd = []
        mydict = {
                'date': list(),
                'outcome': list(),
                'prediction': list(),
                'age': list(),
                'sex': list(),
                'comorbidity': list(),
                'ldh_high': list()
            }

        # Define date range and COVID shock periods
        switchDate = pd.to_datetime(switchDateString, dayfirst=True)  # COVID starts spreading
        switchDate2 = pd.to_datetime('01-06-2020', dayfirst=True)  # Peak of the pandemic
        recoveryDate = pd.to_datetime('01-06-2021', dayfirst=True)  # Start of recovery phase
        numdays = (endDate - startDate).days
        switchDays = (switchDate - startDate).days
        switch2Days = (switchDate2 - startDate).days
        recoveryDays = (recoveryDate - startDate).days

        for i in range(numdays):
            curday = startDate + dt.timedelta(days=i)

            age = (np.random.normal(44, 16.3, num_patients) - 44) / 16.3  # Mean age 44 years, std 16.3
            sex = np.random.binomial(1, 0.562, num_patients) # 56.2% are male
            comorbidity = np.random.binomial(1, 0.3, num_patients)  # 30% have comorbidities
            ldh_high = np.random.binomial(1, 0.15, num_patients)  # 15% have LDH >500 U/L
            epsilon = np.random.normal(0, 0.2, num_patients) # Simulate error term (mean=0, std=0.2)

            # Calculate baseline log-odds
            # sex influence 1.2 due to not being provided in the paper
            lp = -1.5 + log_age * age +  log_ldh * ldh_high + log_comorbidity * comorbidity + 1.2 * (sex - 0.562) + epsilon
            curpredictions = 1 / (1 + np.exp(-lp))  # Convert to probability

            # Simulate COVID effects
            if switchDays <= i < switch2Days:
                lp += custom_impact  # Initial impact of COVID
            elif switch2Days <= i < recoveryDays:
                lp += custom_impact + 0.5  # Peak of the pandemic
            elif i >= recoveryDays:
                lp -= 1.0  # Recovery periodâ€”improved health outcomes

            # Generate outcomes
            curoutcomes = np.random.binomial(1, 1 / (1 + np.exp(-lp)))  # Simulate COVID events

            # Append to dictionary
            mydict['date'].extend([curday] * num_patients)
            mydict['outcome'].extend(curoutcomes)
            mydict['prediction'].extend(curpredictions)
            mydict['age'].extend(age)
            mydict['sex'].extend(sex)
            mydict['comorbidity'].extend(comorbidity)
            mydict['ldh_high'].extend(ldh_high)

        df = pd.DataFrame(mydict)

        covid_metrics_df = get_metrics_recal_methods(df, custom_impact, recalthreshold, model_name='COVID_datasim')
        undetected, regular_ttd, static_ttd, spc_ttd3, spc_ttd5, spc_ttd7 = run_recalibration_tests(df, switchDate, undetected, regular_ttd, static_ttd, spc_ttd3, spc_ttd5, spc_ttd7, recalthreshold)

        ########################################### Bayesian Testing #######################################
        bay_model = BayesianModel(input_data=df, priors={"Intercept": (-1, 0.25), "age": (log_age, 0.25), "sex": (1, 0.25), "comorbidity": (log_comorbidity, 0.25), "ldh_high": (log_ldh, 0.25)}, cores=1, verbose=False, draws=1000, tune=250, chains=4)
        bay_model.trigger = TimeframeTrigger(model=bay_model, updateTimestep='month', dataStart=startDate, dataEnd=endDate)
        mytest = PREDICT(data=df, model=bay_model, startDate='min', endDate='max', timestep='month')
        mytest.addLogHook(Accuracy(bay_model))
        mytest.addLogHook(AUROC(bay_model))
        mytest.addLogHook(Precision(bay_model))
        mytest.addLogHook(CalibrationSlope(bay_model))
        mytest.addLogHook(CITL(bay_model))
        mytest.addLogHook(OE(bay_model))
        mytest.addLogHook(AUPRC(bay_model))
        mytest.addLogHook(TrackBayesianCoefs(bay_model))
        mytest.run()
        log = mytest.getLog()

        if "BayesianCoefficients" in log:
            bayes_dict["BayesianCoefficients"].update(log["BayesianCoefficients"])
            print(log["BayesianCoefficients"])
        
        ttd = find_bayes_coef_change(bayes_dict["BayesianCoefficients"], detectDate=switchDate, undetected=undetected, threshold=0.1)
        print(ttd)
        bayesian_ttd.append(ttd)

        bayes_metrics = pd.DataFrame({'Time': list(log["Accuracy"].keys()), 'Accuracy': list(log["Accuracy"].values()), 'AUROC': list(log["AUROC"].values()), 'Precision': list(log["Precision"].values()), 'CalibrationSlope': list(log["CalibrationSlope"].values()), 'CITL': list(log["CITL"].values()), 'OE': list(log["O/E"].values()), 'AUPRC': list(log["AUPRC"].values()), 'impact_or_prev': list([str(custom_impact)] * len(log["Accuracy"])), 'Method':list(['Bayesian'] * len(log["Accuracy"]))})
        
        ########################################### Save Metrics #######################################

        # concatenate all the dataframes into one
        covid_metrics_df = pd.concat([covid_metrics_df, bayes_metrics], ignore_index=True)
        covid_metrics_df["Data_Type"] = "COVID Simulation"

        covid_metrics_df.to_csv('performance_metrics.csv', mode='a', header=False, index=False)

        update_ttd_table(regular_ttd, static_ttd, spc_ttd3, spc_ttd5, spc_ttd7, bayesian_ttd, custom_impact, 'covid_ttd_tbl.csv')

        # these two just do the final impact value:
        BayesianCoefsPlot(bayes_dict, f"fast_change_impact_{custom_impact}") 
        plot_prev_over_time(df, switchDateStrings, regular_ttd, static_ttd, spc_ttd3, spc_ttd5, spc_ttd7, bayesian_ttd, f"fast_change_impact_{custom_impact}")
        

Modeling the probability that outcome==1


No model formula was provided, using standard linear model formula.
Model formula is set to:  outcome~age+sex+comorbidity+ldh_high


Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 20 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 23 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 21 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 26 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 10 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 12 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 10 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 16 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took -58 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 16 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 16 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 16 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 9 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 9 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 9 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 9 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 16 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 9 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 23 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 9 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 9 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 9 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 9 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 23 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 23 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 16 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 9 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 29 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 23 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 19 seconds.


{Timestamp('2019-07-01 00:00:00'): {'Intercept': (-1, 0.25), 'age': (-0.6931471805599453, 0.25), 'sex': (1, 0.25), 'comorbidity': (1.3609765531356006, 0.25), 'ldh_high': (2.2823823856765264, 0.25)}, Timestamp('2019-08-01 00:00:00'): {'Intercept': (-2.0872909030592663, 0.25), 'age': (-0.7054040486315156, 0.25), 'sex': (1.1852678360102757, 0.25), 'comorbidity': (1.2021298074172142, 0.25), 'ldh_high': (2.3029467878473797, 0.25)}, Timestamp('2019-09-01 00:00:00'): {'Intercept': (-2.190047681114543, 0.25), 'age': (-0.7234062869883483, 0.25), 'sex': (1.3269809896452474, 0.25), 'comorbidity': (1.3837480340637778, 0.25), 'ldh_high': (2.262869760952179, 0.25)}, Timestamp('2019-10-01 00:00:00'): {'Intercept': (-2.3091417181727145, 0.25), 'age': (-0.6422718598246111, 0.25), 'sex': (1.2064204277070114, 0.25), 'comorbidity': (1.288624005129687, 0.25), 'ldh_high': (2.462170611913387, 0.25)}, Timestamp('2019-11-01 00:00:00'): {'Intercept': (-2.583932197917488, 0.25), 'age': (-0.7826940012706984, 0.25

FileNotFoundError: [Errno 2] No such file or directory: 'covid_ttd_tbl.csv'

## Outcome Prevalence Change - Diabetes Outcome

In [10]:
recalthreshold = 0.77 # Paper has AUROC of 0.81, with lower CI at 0.77 

prev_increases = np.arange(1.0001, 1.003, 0.0002).tolist()
#prev_increases = np.arange(1.0001, 1.003, 0.0002).tolist()
undetected = dict({"Static Threshold": 0, "Regular Testing": 0, "SPC3": 0, "SPC5": 0, "SPC7": 0, "Bayesian": 0})
bayes_dict = {"BayesianCoefficients":{}}
# coefficients from non-laboratory logistic regression model
age_at_lv_coef = 0.16 # lv = last visit
bmi_coef = 0.68
hip_circ_coef = -0.05
sex_coef = -0.14
height_coef = -0.15
waist_circ_coef = 0.31
waist_hips_ratio_coef = 0.54
weight_coef = 0.03
time_between_visits_coef = 0.38
bias_coef = -0.74

# mean and standard deviation for each predictor
# variable at the last visit is used
mean_age, std_age = 62.9, 7.5
mean_bmi, std_bmi = 26.6, 4.4
mean_hip_circ, std_hip_circ = 101.6, 8.8
perc_male, mean_height, std_height = 0.478, 169, 9.2
mean_waist_circ, std_waist_circ = 88.7, 12.7
mean_weight, std_weight = 76.2, 15.2
mean_time_between_visits, std_time_between_visits = 7.3, 2.3

mean_waist_hips_ratio = mean_waist_circ / mean_hip_circ
std_waist_hips_ratio = mean_waist_hips_ratio * np.sqrt(
    (std_waist_circ / mean_waist_circ) ** 2 + (std_hip_circ / mean_hip_circ) ** 2)

dm_prev = 0.07  # Initial diabetes prevalence = 7%
for prev_increase in prev_increases:
    regular_ttd = []
    static_ttd = []
    spc_ttd3 = []
    spc_ttd5 = []
    spc_ttd7 = []
    bayesian_ttd = []
    mydict = {
            'date': list(),
            'outcome': list(),
            'prediction': list(),
            'age': list(),
            'bmi':list(),
            'hip_circ': list(),
            'sex': list(),
            'height': list(),
            'waist_circ': list(),
            'waist_hips_ratio': list(),
            'weight': list(),
            'time_between_visits': list()
        }

    num_patients = 60
    numdays = (endDate - startDate).days
    
    for i in range(numdays):
        curday = startDate + dt.timedelta(days=i)

        age = np.random.normal(mean_age, std_age, num_patients)
        # min max normalisation
        age = (age - np.min(age)) / (np.max(age) - np.min(age))  # Normalize age to [0, 1]

        bmi = np.random.normal(mean_bmi, std_bmi, num_patients) 
        bmi = (bmi - np.min(bmi)) / (np.max(bmi) - np.min(bmi))  # Normalize BMI to [0, 1]

        hip_circ = np.random.normal(mean_hip_circ, std_hip_circ, num_patients)
        hip_circ = (hip_circ - np.min(hip_circ)) / (np.max(hip_circ) - np.min(hip_circ))

        height = np.random.normal(mean_height, std_height, num_patients)
        height = (height - np.min(height)) / (np.max(height) - np.min(height))  # Normalize height to [0, 1]

        waist_circ = np.random.normal(mean_waist_circ, std_waist_circ, num_patients)
        waist_circ = (waist_circ - np.min(waist_circ)) / (np.max(waist_circ) - np.min(waist_circ))  # Normalize waist circumference to [0, 1]

        waist_hips_ratio = np.random.normal(mean_waist_hips_ratio, std_waist_hips_ratio, num_patients)
        waist_hips_ratio = (waist_hips_ratio - np.min(waist_hips_ratio)) / (np.max(waist_hips_ratio) - np.min(waist_hips_ratio))  # Normalize waist-hips ratio to [0, 1]

        weight = np.random.normal(mean_weight, std_weight, num_patients)
        weight = (weight - np.min(weight)) / (np.max(weight) - np.min(weight))  # Normalize weight to [0, 1]

        time_between_visits = np.random.normal(mean_time_between_visits, std_time_between_visits, num_patients)
        time_between_visits = (time_between_visits - np.min(time_between_visits)) / (np.max(time_between_visits) - np.min(time_between_visits))  # Normalize time between visits to [0, 1]

        sex = np.random.binomial(1, perc_male, num_patients)

        epsilon = np.random.normal(0, 0.2, num_patients) # Simulate error term (mean=0, std=0.2)
        

        # Calculate baseline log-odds
        lp = bias_coef + age_at_lv_coef * age + bmi_coef * bmi + hip_circ_coef * hip_circ + sex_coef * (sex - perc_male) + height_coef * height + waist_circ_coef * waist_circ  + waist_hips_ratio_coef * waist_hips_ratio + weight_coef * weight  + time_between_visits_coef * time_between_visits + epsilon
        
        curpredictions = 1 / (1 + np.exp(-lp))  # Convert to probability

        # Generate outcomes to simulate diabetes rates increasing over time
        if i % 30 == 0:
            dm_prev *= prev_increase # this increases the probability by x% each month

        mod_prob = 1/(1+np.exp(-(lp + dm_prev)))
        # intercept changed, but model weights constant
        # diabetes increased as outcome, but not explained by data
        curoutcomes = np.random.binomial(1, mod_prob)           
        

        # Append to dictionary from the distribution for each of the variables (Table 1)
        mydict['date'].extend([curday] * num_patients)
        mydict['outcome'].extend(curoutcomes)
        mydict['prediction'].extend(curpredictions)
        mydict['age'].extend(age)
        mydict['bmi'].extend(bmi)
        mydict['hip_circ'].extend(hip_circ)
        mydict['sex'].extend(sex)
        mydict['height'].extend(height)
        mydict['waist_circ'].extend(waist_circ)
        mydict['waist_hips_ratio'].extend(waist_hips_ratio)
        mydict['weight'].extend(weight)
        mydict['time_between_visits'].extend(time_between_visits)
        

    df = pd.DataFrame(mydict)  
    out_prev_metrics_df = get_metrics_recal_methods(df, dm_prev, recalthreshold, model_name='Outcome_prev_datasim')
    undetected, regular_ttd, static_ttd, spc_ttd3, spc_ttd5, spc_ttd7 = run_recalibration_tests(df, startDate, undetected, regular_ttd, static_ttd, spc_ttd3, spc_ttd5, spc_ttd7, recalthreshold)
    
    
    ########################################### Bayesian Testing #######################################
    bay_model = BayesianModel(input_data=df, priors={"Intercept": (bias_coef, 0.25), "age": (age_at_lv_coef, 0.25), "bmi": (bmi_coef, 0.25), "hip_circ": (hip_circ_coef, 0.25),
                                                "sex": (sex_coef, 0.25), "height":(height_coef, 0.25), "waist_circ":(waist_circ_coef, 0.25),
                                                "waist_hips_ratio":(waist_hips_ratio_coef, 0.25), "weight":(weight_coef, 0.25), 
                                                "time_between_visits":(time_between_visits_coef, 0.25)}, cores=1, verbose=False)
    bay_model.trigger = TimeframeTrigger(model=bay_model, updateTimestep='month', dataStart=startDate, dataEnd=endDate)
    mytest = PREDICT(data=df, model=bay_model, startDate='min', endDate='max', timestep='month')
    mytest.addLogHook(Accuracy(bay_model))
    mytest.addLogHook(AUROC(bay_model))
    mytest.addLogHook(Precision(bay_model))
    mytest.addLogHook(CalibrationSlope(bay_model))
    mytest.addLogHook(CITL(bay_model))
    mytest.addLogHook(OE(bay_model))
    mytest.addLogHook(AUPRC(bay_model))
    mytest.addLogHook(TrackBayesianCoefs(bay_model))
    mytest.run()
    log = mytest.getLog()

    if "BayesianCoefficients" in log:
        bayes_dict["BayesianCoefficients"].update(log["BayesianCoefficients"])
    
    ttd = find_bayes_coef_change(bayes_dict["BayesianCoefficients"], detectDate=startDate, undetected=undetected, threshold=0.1)
    bayesian_ttd.append(ttd)

    bayes_metrics = pd.DataFrame({'Time': list(log["Accuracy"].keys()), 'Accuracy': list(log["Accuracy"].values()), 'AUROC': list(log["AUROC"].values()), 'Precision': list(log["Precision"].values()), 'CalibrationSlope': list(log["CalibrationSlope"].values()), 'CITL': list(log["CITL"].values()), 'OE': list(log["O/E"].values()), 'AUPRC': list(log["AUPRC"].values()), 'impact_or_prev': list([str(dm_prev)] * len(log["Accuracy"])), 'Method':list(['Bayesian'] * len(log["Accuracy"]))})
    
    ########################################### Save Metrics #######################################

    # concatenate all the dataframes into one
    out_prev_metrics_df = pd.concat([out_prev_metrics_df, bayes_metrics], ignore_index=True)
    out_prev_metrics_df["Data_Type"] = "Outcome Prevalence Simulation"

    out_prev_metrics_df.to_csv('performance_metrics.csv', mode='a', header=False, index=False)

    update_ttd_table(regular_ttd, static_ttd, spc_ttd3, spc_ttd5, spc_ttd7, bayesian_ttd, prev_increase, 'output_prev_ttd_tbl.csv')
    
    # Generate plots
    plot_prev_over_time(df, None, regular_ttd, static_ttd, spc_ttd3, spc_ttd5, spc_ttd7, bayesian_ttd, 'outcome_prev'+str(prev_increase))
    BayesianCoefsPlot(bayes_dict, 'outcome_prev'+str(prev_increase))

plot_time_to_detect('output_prev_ttd_tbl.csv', 'outcome_prev')



Modeling the probability that outcome==1


No model formula was provided, using standard linear model formula.
Model formula is set to:  outcome~age+bmi+hip_circ+sex+height+waist_circ+waist_hips_ratio+weight+time_between_visits


Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, bmi, hip_circ, sex, height, waist_circ, waist_hips_ratio, weight, time_between_visits]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 28 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, bmi, hip_circ, sex, height, waist_circ, waist_hips_ratio, weight, time_between_visits]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 34 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, bmi, hip_circ, sex, height, waist_circ, waist_hips_ratio, weight, time_between_visits]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 25 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, bmi, hip_circ, sex, height, waist_circ, waist_hips_ratio, weight, time_between_visits]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 41 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, bmi, hip_circ, sex, height, waist_circ, waist_hips_ratio, weight, time_between_visits]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 42 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, bmi, hip_circ, sex, height, waist_circ, waist_hips_ratio, weight, time_between_visits]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 31 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, bmi, hip_circ, sex, height, waist_circ, waist_hips_ratio, weight, time_between_visits]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 42 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, bmi, hip_circ, sex, height, waist_circ, waist_hips_ratio, weight, time_between_visits]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 22 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, bmi, hip_circ, sex, height, waist_circ, waist_hips_ratio, weight, time_between_visits]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 21 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, bmi, hip_circ, sex, height, waist_circ, waist_hips_ratio, weight, time_between_visits]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 22 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, bmi, hip_circ, sex, height, waist_circ, waist_hips_ratio, weight, time_between_visits]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 34 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, bmi, hip_circ, sex, height, waist_circ, waist_hips_ratio, weight, time_between_visits]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 15 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, bmi, hip_circ, sex, height, waist_circ, waist_hips_ratio, weight, time_between_visits]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 41 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, bmi, hip_circ, sex, height, waist_circ, waist_hips_ratio, weight, time_between_visits]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 22 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, bmi, hip_circ, sex, height, waist_circ, waist_hips_ratio, weight, time_between_visits]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 22 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, bmi, hip_circ, sex, height, waist_circ, waist_hips_ratio, weight, time_between_visits]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 43 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, bmi, hip_circ, sex, height, waist_circ, waist_hips_ratio, weight, time_between_visits]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 23 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, bmi, hip_circ, sex, height, waist_circ, waist_hips_ratio, weight, time_between_visits]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 28 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, bmi, hip_circ, sex, height, waist_circ, waist_hips_ratio, weight, time_between_visits]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 15 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, bmi, hip_circ, sex, height, waist_circ, waist_hips_ratio, weight, time_between_visits]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 35 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, bmi, hip_circ, sex, height, waist_circ, waist_hips_ratio, weight, time_between_visits]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 27 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, bmi, hip_circ, sex, height, waist_circ, waist_hips_ratio, weight, time_between_visits]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 21 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, bmi, hip_circ, sex, height, waist_circ, waist_hips_ratio, weight, time_between_visits]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 15 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, bmi, hip_circ, sex, height, waist_circ, waist_hips_ratio, weight, time_between_visits]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 15 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, bmi, hip_circ, sex, height, waist_circ, waist_hips_ratio, weight, time_between_visits]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 28 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, bmi, hip_circ, sex, height, waist_circ, waist_hips_ratio, weight, time_between_visits]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 28 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, bmi, hip_circ, sex, height, waist_circ, waist_hips_ratio, weight, time_between_visits]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 42 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, bmi, hip_circ, sex, height, waist_circ, waist_hips_ratio, weight, time_between_visits]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 18 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, bmi, hip_circ, sex, height, waist_circ, waist_hips_ratio, weight, time_between_visits]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 22 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, bmi, hip_circ, sex, height, waist_circ, waist_hips_ratio, weight, time_between_visits]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 22 seconds.


Significant change detected in coefficient 'age' from 0.16 to 0.02471907254637529 at timestamp 2019-08-01 00:00:00
Significant change detected in coefficient 'Intercept' from -0.7664970581859325 to -0.46252747889522244 at timestamp 2019-09-01 00:00:00
Significant change detected in coefficient 'Intercept' from -0.46252747889522244 to -0.6665574072363775 at timestamp 2019-10-01 00:00:00
Significant change detected in coefficient 'age' from 0.2736725141780353 to 0.200648462736613 at timestamp 2019-11-01 00:00:00
Significant change detected in coefficient 'Intercept' from -0.6118008768880094 to -0.412605264469118 at timestamp 2019-12-01 00:00:00
Significant change detected in coefficient 'Intercept' from -0.412605264469118 to -0.5718176771215492 at timestamp 2020-01-01 00:00:00
Significant change detected in coefficient 'age' from 0.18620975532715034 to 0.30557643909227555 at timestamp 2020-02-01 00:00:00
Significant change detected in coefficient 'Intercept' from -0.5879711922366372 to -

Modeling the probability that outcome==1


No model formula was provided, using standard linear model formula.
Model formula is set to:  outcome~age+bmi+hip_circ+sex+height+waist_circ+waist_hips_ratio+weight+time_between_visits


Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, bmi, hip_circ, sex, height, waist_circ, waist_hips_ratio, weight, time_between_visits]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 28 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, bmi, hip_circ, sex, height, waist_circ, waist_hips_ratio, weight, time_between_visits]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 41 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, bmi, hip_circ, sex, height, waist_circ, waist_hips_ratio, weight, time_between_visits]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 15 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, bmi, hip_circ, sex, height, waist_circ, waist_hips_ratio, weight, time_between_visits]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 21 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, bmi, hip_circ, sex, height, waist_circ, waist_hips_ratio, weight, time_between_visits]


Output()

Sampling 3 chains for 250 tune and 1_000 draw iterations (750 + 3_000 draws total) took 36 seconds.
We recommend running at least 4 chains for robust computation of convergence diagnostics
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, age, bmi, hip_circ, sex, height, waist_circ, waist_hips_ratio, weight, time_between_visits]


Output()

Sampling 3 chains for 250 tune and 1_000 draw iterations (750 + 3_000 draws total) took 27 seconds.
We recommend running at least 4 chains for robust computation of convergence diagnostics


KeyboardInterrupt: 

## Slow change data simulation - Diabetes as a Predictor (increasing over time) with CKD as the predicted outcome.

In [13]:
recalthreshold = 0.851 # Paper has AUROC of 0.889, with lower CI at 0.851

prev_increases = np.arange(1.0001, 1.0030, 0.0002).tolist() #[1.0001] 
undetected = dict({"Static Threshold": 0, "Regular Testing": 0, "SPC3": 0, "SPC5": 0, "SPC7": 0, "Bayesian": 0})
bayes_dict = {"BayesianCoefficients":{}}

mean_TGFB, std_TGFB = 13.23, 5.18
mean_ADMA, std_ADMA= 101.1, 64.8
mean_BUN, std_BUN = 5.45, 1.11
mean_age, std_age = 63.27, 10.09 

TGFB_coef = 1.84
ADMA_coef = 1.137
DM_coef = 0.84
BUN_coef = 0.497
elderly_coef = 0.603

perc_dm = 0.05 # 5.5%
for prev_increase in prev_increases:
    regular_ttd = []
    static_ttd = []
    spc_ttd3 = []
    spc_ttd5 = []
    spc_ttd7 = []
    bayesian_ttd = []
    mydict = {
            'date': list(),
            'outcome': list(),
            'prediction': list(),
            'TGFB': list(),
            'ADMA':list(),
            'DM': list(),
            'BUN': list(),
            'elderly': list()
        }

    num_patients = 60

    numdays = (endDate - startDate).days

    for i in range(numdays):
        curday = startDate + dt.timedelta(days=i)

        # increase the prevalence of diabetes over time
        if i % 30 == 0:
            perc_dm *= prev_increase # this increases the probability by x% each month

        TGFB = get_binom_from_normal(mean_TGFB, std_TGFB, num_patients, 1.011)
        ADMA = get_binom_from_normal(mean_ADMA, std_ADMA, num_patients, 0.019)
        DM = np.random.binomial(1, perc_dm, num_patients)
        BUN = get_binom_from_normal(mean_BUN, std_BUN, num_patients, 5.9)
        elderly = get_binom_from_normal(mean_age, std_age, num_patients, 60)
        epsilon = np.random.normal(0, 0.2, num_patients) # Simulate error term (mean=0, std=0.2)

        # Calculate baseline log-odds
        # non_genetic_risk_score_model from paper
        lp = TGFB_coef * TGFB + ADMA_coef * ADMA + DM_coef * DM + BUN_coef * BUN + elderly_coef * elderly + epsilon

        curpredictions = 1 / (1 + np.exp(-lp))  # Convert to probability
        curoutcomes = np.random.binomial(1, curpredictions)           
        
        # Append to dictionary from the distribution for each of the variables (Table 1)
        mydict['date'].extend([curday] * num_patients)
        mydict['outcome'].extend(curoutcomes)
        mydict['prediction'].extend(curpredictions)
        mydict['TGFB'].extend(TGFB)
        mydict['ADMA'].extend(ADMA)
        mydict['DM'].extend(DM)
        mydict['BUN'].extend(BUN)
        mydict['elderly'].extend(elderly)
        

    df = pd.DataFrame(mydict)
    slow_change_metrics_df = get_metrics_recal_methods(df, perc_dm, recalthreshold, model_name='slow_change_datasim')
    undetected, regular_ttd, static_ttd, spc_ttd3, spc_ttd5, spc_ttd7 = run_recalibration_tests(df, startDate, undetected, regular_ttd, static_ttd, spc_ttd3, spc_ttd5, spc_ttd7, recalthreshold)
    

    ########################################### Bayesian Testing #######################################
    bay_model = BayesianModel(input_data=df, priors={"Intercept": (-1, 0.25), "TGFB": (TGFB_coef, 0.25), "ADMA": (ADMA_coef, 0.25), "DM": (DM_coef, 0.25), "BUN": (BUN_coef, 0.25),
                                                "elderly": (elderly_coef, 0.25)}, cores=1, verbose=False)
    bay_model.trigger = TimeframeTrigger(model=bay_model, updateTimestep='month', dataStart=startDate, dataEnd=endDate)
    mytest = PREDICT(data=df, model=bay_model, startDate='min', endDate='max', timestep='month')
    mytest.addLogHook(Accuracy(bay_model))
    mytest.addLogHook(AUROC(bay_model))
    mytest.addLogHook(Precision(bay_model))
    mytest.addLogHook(CalibrationSlope(bay_model))
    mytest.addLogHook(CITL(bay_model))
    mytest.addLogHook(OE(bay_model))
    mytest.addLogHook(AUPRC(bay_model))
    mytest.addLogHook(TrackBayesianCoefs(bay_model))
    mytest.run()
    log = mytest.getLog()

    if "BayesianCoefficients" in log:
        bayes_dict["BayesianCoefficients"].update(log["BayesianCoefficients"])
    
    ttd = find_bayes_coef_change(bayes_dict["BayesianCoefficients"], detectDate=startDate, undetected=undetected, threshold=0.1)
    bayesian_ttd.append(ttd)

    bayes_metrics = pd.DataFrame({'Time': list(log["Accuracy"].keys()), 'Accuracy': list(log["Accuracy"].values()), 'AUROC': list(log["AUROC"].values()), 'Precision': list(log["Precision"].values()), 'CalibrationSlope': list(log["CalibrationSlope"].values()), 'CITL': list(log["CITL"].values()), 'OE': list(log["O/E"].values()), 'AUPRC': list(log["AUPRC"].values()), 'impact_or_prev': list([str(perc_dm)] * len(log["Accuracy"])), 'Method':list(['Bayesian'] * len(log["Accuracy"]))})
    
    ########################################### Save Metrics #######################################

    # concatenate all the dataframes into one
    slow_change_metrics_df = pd.concat([slow_change_metrics_df, bayes_metrics], ignore_index=True)
    slow_change_metrics_df["Data_Type"] = "Slow Change Simulation"

    slow_change_metrics_df.to_csv('performance_metrics.csv', mode='a', header=False, index=False)

    update_ttd_table(regular_ttd, static_ttd, spc_ttd3, spc_ttd5, spc_ttd7, bayesian_ttd, prev_increase, 'input_prev_ttd_tbl.csv')
    
    # Generate plots
    plot_prev_over_time(df, None, regular_ttd, static_ttd, spc_ttd3, spc_ttd5, spc_ttd7, bayesian_ttd, 'slow_change_'+str(prev_increase))
    BayesianCoefsPlot(bayes_dict, 'slow_change_'+str(prev_increase))

plot_time_to_detect('input_prev_ttd_tbl.csv', 'slow_change')


Modeling the probability that outcome==1


No model formula was provided, using standard linear model formula.
Model formula is set to:  outcome~TGFB+ADMA+DM+BUN+elderly


Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, TGFB, ADMA, DM, BUN, elderly]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 32 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, TGFB, ADMA, DM, BUN, elderly]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 33 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, TGFB, ADMA, DM, BUN, elderly]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 12 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, TGFB, ADMA, DM, BUN, elderly]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 31 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, TGFB, ADMA, DM, BUN, elderly]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 18 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, TGFB, ADMA, DM, BUN, elderly]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 32 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, TGFB, ADMA, DM, BUN, elderly]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 19 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, TGFB, ADMA, DM, BUN, elderly]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 26 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, TGFB, ADMA, DM, BUN, elderly]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 25 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, TGFB, ADMA, DM, BUN, elderly]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 12 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, TGFB, ADMA, DM, BUN, elderly]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 11 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, TGFB, ADMA, DM, BUN, elderly]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 12 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, TGFB, ADMA, DM, BUN, elderly]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 19 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, TGFB, ADMA, DM, BUN, elderly]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 15 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, TGFB, ADMA, DM, BUN, elderly]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 25 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, TGFB, ADMA, DM, BUN, elderly]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 25 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, TGFB, ADMA, DM, BUN, elderly]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 25 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, TGFB, ADMA, DM, BUN, elderly]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 14 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, TGFB, ADMA, DM, BUN, elderly]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 23 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, TGFB, ADMA, DM, BUN, elderly]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 25 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, TGFB, ADMA, DM, BUN, elderly]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 18 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, TGFB, ADMA, DM, BUN, elderly]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 19 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, TGFB, ADMA, DM, BUN, elderly]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 18 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, TGFB, ADMA, DM, BUN, elderly]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 12 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, TGFB, ADMA, DM, BUN, elderly]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 19 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, TGFB, ADMA, DM, BUN, elderly]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 12 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, TGFB, ADMA, DM, BUN, elderly]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 32 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, TGFB, ADMA, DM, BUN, elderly]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 25 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, TGFB, ADMA, DM, BUN, elderly]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 25 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, TGFB, ADMA, DM, BUN, elderly]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 18 seconds.


Significant change detected in coefficient 'Intercept' from -1 to -0.36911698201067233 at timestamp 2019-08-01 00:00:00
Significant change detected in coefficient 'ADMA' from 0.9744110456187677 to 1.1038179756744557 at timestamp 2019-09-01 00:00:00
Significant change detected in coefficient 'Intercept' from -0.341791553034366 to -0.08509903094876432 at timestamp 2019-10-01 00:00:00
Significant change detected in coefficient 'Intercept' from -0.08509903094876432 to 0.27390022454206214 at timestamp 2019-11-01 00:00:00
Significant change detected in coefficient 'Intercept' from 0.27390022454206214 to 0.4814902452662598 at timestamp 2019-12-01 00:00:00
Significant change detected in coefficient 'Intercept' from 0.4814902452662598 to 0.6504129385200659 at timestamp 2020-01-01 00:00:00
Significant change detected in coefficient 'Intercept' from 0.6504129385200659 to 0.3602555058879249 at timestamp 2020-02-01 00:00:00
Significant change detected in coefficient 'Intercept' from 0.3602555058879

Modeling the probability that outcome==1


No model formula was provided, using standard linear model formula.
Model formula is set to:  outcome~TGFB+ADMA+DM+BUN+elderly


Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, TGFB, ADMA, DM, BUN, elderly]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 25 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, TGFB, ADMA, DM, BUN, elderly]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 12 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, TGFB, ADMA, DM, BUN, elderly]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 12 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, TGFB, ADMA, DM, BUN, elderly]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 18 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, TGFB, ADMA, DM, BUN, elderly]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 19 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, TGFB, ADMA, DM, BUN, elderly]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 12 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, TGFB, ADMA, DM, BUN, elderly]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 39 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, TGFB, ADMA, DM, BUN, elderly]


Output()

Sampling 3 chains for 250 tune and 1_000 draw iterations (750 + 3_000 draws total) took 32 seconds.
We recommend running at least 4 chains for robust computation of convergence diagnostics
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...


KeyboardInterrupt: 

## Multivariate Model - QRISK2 - Diabetes increasing whilst smoking is decreasing.

In [15]:
recalthreshold = 0.811 # Paper has AUROC of 0.814, with lower CI at 0.811 

# Define the coefficients (hazard ratios converted to log-odds)
coefs = {"White": np.log(1), 
    "Indian": np.log(1.43),
    "Pakistani": np.log(1.8),
    "Bangladeshi": np.log(1.35),
    "Other_Asian": np.log(1.15),
    "Black_Caribbean": np.log(1.08),
    "Black_African": np.log(0.58),
    "Chinese": np.log(0.69),
    "Other": np.log(1.04),
    "Age": np.log(1.66),
    "BMI": np.log(1.08),
    "Townsend": np.log(1.37),
    "SBP": np.log(1.2),
    "CholHDL_ratio": np.log(1.17),
    "Family_CHD": np.log(1.99),
    "Current_smoker": np.log(1.8),
    "Treated_HTN": np.log(1.54),
    "DM": np.log(2.54),
    "RA": np.log(1.5),
    "AF": np.log(3.06),
    "Renal_disease": np.log(1.7),
    "Age_BMI": np.log(0.976),
    "Age_Townsend": np.log(0.938),
    "Age_SBP": np.log(0.966),
    "Age_Family_CHD": np.log(0.927),
    "Age_Smoking": np.log(0.931),
    "Age_Treated_HTN": np.log(0.952),
    "Age_DM": np.log(0.904),
    "Age_AF": np.log(0.858)
}


# Percentage variables (/100)
percent_family_history_chd = 0.126
percent_treated_hypertension = 0.0712
percent_rheumatoid_arthritis = 0.0093
percent_atrial_fibrillation = 0.0035
percent_renal_disease = 0.0016

# Age variable
median_age, IQR_age = 49, 19
mean_age, std_age = median_age, IQR_age / 1.35

# Mean and standard deviation variables
mean_bmi, std_bmi = 33.8, 6.1
mean_townsend, std_townsend = 17.67, 3.534
mean_sbp, std_sbp = 26.6, 4.4
mean_chol_hdl_ratio, std_chol_hdl_ratio = 3.66, 0.144


intercept = None
baseline_prob = 0.233 # 23.3%

prev_increases = np.arange(1.0007, 1.003, 0.0002).tolist() # Increase in diabetes prevalence over time STARTS AT 1.0007
smoking_decrease = np.arange(0.9995, 0.9973, -0.0002).tolist()  # Decrease in smoking prevalence over time
undetected = dict({"Static Threshold": 0, "Regular Testing": 0, "SPC3": 0, "SPC5": 0, "SPC7": 0, "Bayesian": 0})
bayes_dict = {"BayesianCoefficients":{}}

percent_type_2_diabetes = 0.017 # reset these for each start date
percent_current_smoker = 0.228
for num, prev_increase in enumerate(prev_increases):
    regular_ttd = []
    static_ttd = []
    spc_ttd3 = []
    spc_ttd5 = []
    spc_ttd7 = []
    bayesian_ttd = []
    mydict = {
            'date': list(),
            'outcome': list(),
            'prediction': list(),
            'White': list(),
            'Indian': list(),
            'Pakistani': list(),
            'Bangladeshi': list(),
            'Other_Asian': list(),
            'Black_Caribbean': list(),
            'Black_African': list(),
            'Chinese': list(),
            'Other': list(),
            'Age': list(),
            'BMI':list(),
            'Townsend': list(),
            'SBP': list(),
            'CholHDL_ratio': list(),
            'Family_CHD': list(),
            'Current_smoker': list(),
            'Treated_HTN': list(),
            'DM': list(),
            'RA': list(),
            'AF': list(),
            'Renal_disease': list()
        }


    # Define date range
    numdays = (endDate - startDate).days

    
    for i in range(numdays):
        curday = startDate + dt.timedelta(days=i)

        # increase the prevalence of diabetes over time
        if i % 30 == 0:
            percent_type_2_diabetes *= prev_increase # this increases the probability by x% each month
            percent_current_smoker *= smoking_decrease[num] # decrease the prevalence of smoking over time
        if percent_type_2_diabetes < 0 or percent_type_2_diabetes > 1:
            print("Percentage of people with DM", percent_type_2_diabetes)
        if percent_current_smoker < 0 or percent_current_smoker > 1:
            print("Percentage of people who are current smokers", percent_current_smoker)

        # Generate random factors for patients using min max normalization for non-binary values
        age = np.random.normal(mean_age, std_age, num_patients) 
        age = (age - np.min(age)) / (np.max(age) - np.min(age))
        bmi = np.random.normal(mean_bmi, std_bmi, num_patients)
        bmi = (bmi - np.min(bmi)) / (np.max(bmi) - np.min(bmi))
        townsend = np.random.normal(mean_townsend, std_townsend, num_patients)
        townsend = (townsend - np.min(townsend)) / (np.max(townsend) - np.min(townsend))
        SBP = np.random.normal(mean_sbp, std_sbp, num_patients)
        SBP = (SBP - np.min(SBP)) / (np.max(SBP) - np.min(SBP))
        chol_hdl_ratio = np.random.normal(mean_chol_hdl_ratio, std_chol_hdl_ratio, num_patients)
        chol_hdl_ratio = (chol_hdl_ratio - np.min(chol_hdl_ratio)) / (np.max(chol_hdl_ratio) - np.min(chol_hdl_ratio))
        pat_factors = {"Age": age, 
            "BMI": bmi,
            "Townsend": townsend,
            "SBP": SBP,
            "CholHDL_ratio": chol_hdl_ratio,
            "Family_CHD": np.random.binomial(1, percent_family_history_chd, num_patients),
            "Current_smoker": np.random.binomial(1, percent_current_smoker, num_patients),
            "Treated_HTN": np.random.binomial(1, percent_treated_hypertension, num_patients),
            "DM": np.random.binomial(1, percent_type_2_diabetes, num_patients),
            "RA": np.random.binomial(1, percent_rheumatoid_arthritis, num_patients),
            "AF": np.random.binomial(1, percent_atrial_fibrillation, num_patients),
            "Renal_disease": np.random.binomial(1, percent_renal_disease, num_patients)
        }
        epsilon = np.random.normal(0, 0.2, num_patients) # Simulate error term (mean=0, std=0.2)

        ethnicity_assignment = select_ethnic_group(num_patients)
        pat_factors.update(ethnicity_assignment) # combine ethnicity dict with ethnic

        # Calculate baseline log-odds
        weighted_coef_sum = coefs['White']*pat_factors['White'] + coefs['Indian']*pat_factors['Indian'] + coefs['Pakistani']*pat_factors['Pakistani'] + coefs['Bangladeshi']*pat_factors['Bangladeshi'] 
        weighted_coef_sum += coefs['Other_Asian']*pat_factors['Other_Asian'] + coefs['Black_Caribbean']*pat_factors['Black_Caribbean'] + coefs['Black_African']*pat_factors['Black_African'] 
        weighted_coef_sum += coefs['Chinese']*pat_factors['Chinese'] + coefs['Other']*pat_factors['Other'] + coefs['Age']*(pat_factors['Age']) + coefs['BMI']*(pat_factors['BMI']) 
        weighted_coef_sum += coefs['Townsend']*(pat_factors['Townsend']) + coefs['SBP']*(pat_factors['SBP']) + coefs['CholHDL_ratio']*(pat_factors['CholHDL_ratio']) 
        weighted_coef_sum += coefs["Family_CHD"]*(pat_factors["Family_CHD"]) + coefs["Current_smoker"]*(pat_factors["Current_smoker"]) 
        weighted_coef_sum += coefs["Treated_HTN"]*(pat_factors["Treated_HTN"]) + coefs["DM"]*(pat_factors["DM"]) + coefs["RA"]*(pat_factors["RA"]) 
        weighted_coef_sum += coefs["AF"]*(pat_factors["AF"]) + coefs["Renal_disease"]*(pat_factors["Renal_disease"]) + (coefs["Age_BMI"] * pat_factors["Age"] * pat_factors["BMI"]) 
        weighted_coef_sum += (coefs["Age_Townsend"] * pat_factors["Age"] * pat_factors["Townsend"]) + (coefs["Age_SBP"] * pat_factors["Age"] * pat_factors["SBP"]) 
        weighted_coef_sum += (coefs["Age_Family_CHD"] * pat_factors["Age"] * pat_factors["Family_CHD"]) + (coefs["Age_Smoking"] * pat_factors["Age"] * pat_factors["Current_smoker"]) 
        weighted_coef_sum += (coefs["Age_Treated_HTN"] * pat_factors["Age"] * pat_factors["Treated_HTN"]) + (coefs["Age_DM"] * pat_factors["Age"] * pat_factors["DM"])
        weighted_coef_sum += (coefs["Age_AF"] * pat_factors["Age"] * pat_factors["AF"]) + epsilon

    
        intercept = np.log(baseline_prob / (1 - baseline_prob))
        
        # Compute log-odds
        lp = intercept + weighted_coef_sum
        lp = np.clip(lp, -500, 500)  # Clip to avoid overflow issues
        
        curpredictions = 1 / (1 + np.exp(-lp))  # Convert to probability
        
        
        curoutcomes = np.random.binomial(1, curpredictions)         
        

        # Append to dictionary from the distribution for each of the variables (Table 1)
        mydict['date'].extend([curday] * num_patients)
        mydict['outcome'].extend(curoutcomes)
        mydict['prediction'].extend(curpredictions)
        mydict['White'].extend(pat_factors['White'])
        mydict['Indian'].extend(pat_factors['Indian'])
        mydict['Pakistani'].extend(pat_factors['Pakistani'])
        mydict['Bangladeshi'].extend(pat_factors['Bangladeshi'])
        mydict['Other_Asian'].extend(pat_factors['Other_Asian'])
        mydict['Black_Caribbean'].extend(pat_factors['Black_Caribbean'])
        mydict['Black_African'].extend(pat_factors['Black_African'])
        mydict['Chinese'].extend(pat_factors['Chinese'])
        mydict['Other'].extend(pat_factors['Other'])
        mydict['Age'].extend(pat_factors['Age'])
        mydict['BMI'].extend(pat_factors['BMI'])
        mydict['Townsend'].extend(pat_factors['Townsend'])
        mydict['SBP'].extend(pat_factors['SBP'])
        mydict['CholHDL_ratio'].extend(pat_factors['CholHDL_ratio'])
        mydict['Family_CHD'].extend(pat_factors['Family_CHD'])
        mydict['Current_smoker'].extend(pat_factors['Current_smoker'])
        mydict['Treated_HTN'].extend(pat_factors['Treated_HTN'])
        mydict['DM'].extend(pat_factors['DM'])
        mydict['RA'].extend(pat_factors['RA'])
        mydict['AF'].extend(pat_factors['AF'])
        mydict['Renal_disease'].extend(pat_factors['Renal_disease'])

        

    df = pd.DataFrame(mydict)
    df['Age_BMI'] = df['Age'] * df['BMI']
    df['Age_Townsend'] = df['Age'] * df['Townsend']
    df['Age_SBP'] = df['Age'] * df['SBP']
    df['Age_Family_CHD'] = df['Age'] * df['Family_CHD']
    df['Age_Smoking'] = df['Age'] * df['Current_smoker']
    df['Age_Treated_HTN'] = df['Age'] * df['Treated_HTN']
    df['Age_DM'] = df['Age'] * df['DM']
    df['Age_AF'] = df['Age'] * df['AF']
            
    df = prevent_constant_variable(df, startDate, endDate)

    multivariate_metrics_df = get_metrics_recal_methods(df, percent_type_2_diabetes, recalthreshold, model_name='QRISK_datasim')
    undetected, regular_ttd, static_ttd, spc_ttd3, spc_ttd5, spc_ttd7 = run_recalibration_tests(df, startDate, undetected, regular_ttd, static_ttd, spc_ttd3, spc_ttd5, spc_ttd7, recalthreshold)
    
    
    ########################################### Bayesian Testing #######################################
    bay_model = BayesianModel(input_data=df, priors={"Intercept": (intercept, 0.25),
                                                "White": (coefs['White'], 0.25), 
                                                "Indian": (coefs['Indian'], 0.25),
                                                "Pakistani": (coefs['Pakistani'], 0.25),
                                                "Bangladeshi": (coefs['Bangladeshi'], 0.25),
                                                "Other_Asian": (coefs['Other_Asian'], 0.25),
                                                "Black_Caribbean": (coefs['Black_Caribbean'], 0.25),
                                                "Black_African": (coefs['Black_African'], 0.25),
                                                "Chinese": (coefs['Chinese'], 0.25),
                                                "Other": (coefs['Other'], 0.25),
                                                "Age": (coefs['Age'], 0.25),
                                                "BMI": (coefs['BMI'], 0.25),
                                                "Townsend": (coefs['Townsend'], 0.25),
                                                "SBP": (coefs['SBP'], 0.25),
                                                "CholHDL_ratio": (coefs['CholHDL_ratio'], 0.25),
                                                "Family_CHD": (coefs['Family_CHD'], 0.25),
                                                "Current_smoker": (coefs['Current_smoker'], 0.25),
                                                "Treated_HTN": (coefs['Treated_HTN'], 0.25),
                                                "DM": (coefs['DM'], 0.25),
                                                "RA": (coefs['RA'], 0.25),
                                                "AF": (coefs['AF'], 0.25),
                                                "Renal_disease": (coefs['Renal_disease'], 0.25),
                                                "Age_BMI": (coefs['Age_BMI'], 0.25),
                                                "Age_Townsend": (coefs['Age_Townsend'], 0.25),
                                                "Age_SBP": (coefs['Age_SBP'], 0.25),
                                                "Age_Family_CHD": (coefs['Age_Family_CHD'], 0.25),
                                                "Age_Smoking": (coefs['Age_Smoking'], 0.25),
                                                "Age_Treated_HTN": (coefs['Age_Treated_HTN'], 0.25),
                                                "Age_DM": (coefs['Age_DM'], 0.25),
                                                "Age_AF": (coefs['Age_AF'], 0.25)}, 
                                                cores=1, verbose=False,
                                                model_formula="outcome ~ White + Indian + Pakistani + Bangladeshi + Other_Asian + Black_Caribbean + Black_African + Chinese + Other + Age + BMI + Townsend + SBP + CholHDL_ratio + Family_CHD + Current_smoker + Treated_HTN + DM + RA + AF + Renal_disease + Age_BMI + Age_Townsend + Age_SBP + Age_Family_CHD + Age_Smoking + Age_Treated_HTN + Age_DM + Age_AF")
    bay_model.trigger = TimeframeTrigger(model=bay_model, updateTimestep='month', dataStart=startDate, dataEnd=endDate)
    mytest = PREDICT(data=df, model=bay_model, startDate='min', endDate='max', timestep='month')
    mytest.addLogHook(Accuracy(bay_model))
    mytest.addLogHook(AUROC(bay_model))
    mytest.addLogHook(Precision(bay_model))
    mytest.addLogHook(CalibrationSlope(bay_model))
    mytest.addLogHook(CITL(bay_model))
    mytest.addLogHook(OE(bay_model))
    mytest.addLogHook(AUPRC(bay_model))
    mytest.addLogHook(TrackBayesianCoefs(bay_model))
    mytest.run()
    log = mytest.getLog()

    if "BayesianCoefficients" in log:
        bayes_dict["BayesianCoefficients"].update(log["BayesianCoefficients"])
    
    ttd = find_bayes_coef_change(bayes_dict["BayesianCoefficients"], detectDate=startDate, undetected=undetected, threshold=0.1)
    bayesian_ttd.append(ttd)

    bayes_metrics = pd.DataFrame({'Time': list(log["Accuracy"].keys()), 'Accuracy': list(log["Accuracy"].values()), 'AUROC': list(log["AUROC"].values()), 'Precision': list(log["Precision"].values()), 'CalibrationSlope': list(log["CalibrationSlope"].values()), 'CITL': list(log["CITL"].values()), 'OE': list(log["O/E"].values()), 'AUPRC': list(log["AUPRC"].values()), 'impact_or_prev': list([str(percent_type_2_diabetes)] * len(log["Accuracy"])), 'Method':list(['Bayesian'] * len(log["Accuracy"]))})
    
    ########################################### Save Metrics #######################################

    # concatenate all the dataframes into one
    multivariate_metrics_df = pd.concat([multivariate_metrics_df, bayes_metrics], ignore_index=True)
    multivariate_metrics_df["Data_Type"] = "Multivariate Simulation"
    
    multivariate_metrics_df.to_csv('performance_metrics.csv', mode='a', header=False, index=False)

    update_ttd_table(regular_ttd, static_ttd, spc_ttd3, spc_ttd5, spc_ttd7, bayesian_ttd, prev_increase, 'multivariate_ttd_tbl.csv')
    
    # Generate plots
    plot_prev_over_time(df, None, regular_ttd, static_ttd, spc_ttd3, spc_ttd5, spc_ttd7, bayesian_ttd, 'multivariate_'+str(prev_increase))
    BayesianCoefsPlot(bayes_dict, 'multivariate_'+str(prev_increase))

plot_time_to_detect('multivariate_ttd_tbl.csv', 'multivariate')

Modeling the probability that outcome==1
INFO:bambi:Modeling the probability that outcome==1


Model formula is set to:  outcome ~ White + Indian + Pakistani + Bangladeshi + Other_Asian + Black_Caribbean + Black_African + Chinese + Other + Age + BMI + Townsend + SBP + CholHDL_ratio + Family_CHD + Current_smoker + Treated_HTN + DM + RA + AF + Renal_disease + Age_BMI + Age_Townsend + Age_SBP + Age_Family_CHD + Age_Smoking + Age_Treated_HTN + Age_DM + Age_AF


Initializing NUTS using jitter+adapt_diag...
INFO:pymc.sampling.mcmc:Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
INFO:pymc.sampling.mcmc:Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, White, Indian, Pakistani, Bangladeshi, Other_Asian, Black_Caribbean, Black_African, Chinese, Other, Age, BMI, Townsend, SBP, CholHDL_ratio, Family_CHD, Current_smoker, Treated_HTN, DM, RA, AF, Renal_disease, Age_BMI, Age_Townsend, Age_SBP, Age_Family_CHD, Age_Smoking, Age_Treated_HTN, Age_DM, Age_AF]
INFO:pymc.sampling.mcmc:NUTS: [Intercept, White, Indian, Pakistani, Bangladeshi, Other_Asian, Black_Caribbean, Black_African, Chinese, Other, Age, BMI, Townsend, SBP, CholHDL_ratio, Family_CHD, Current_smoker, Treated_HTN, DM, RA, AF, Renal_disease, Age_BMI, Age_Townsend, Age_SBP, Age_Family_CHD, Age_Smoking, Age_Treated_HTN, Age_DM, Age_AF]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 35 seconds.
INFO:pymc.sampling.mcmc:Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 35 seconds.
Modeling the probability that outcome==1
INFO:bambi:Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
INFO:pymc.sampling.mcmc:Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
INFO:pymc.sampling.mcmc:Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, White, Indian, Pakistani, Bangladeshi, Other_Asian, Black_Caribbean, Black_African, Chinese, Other, Age, BMI, Townsend, SBP, CholHDL_ratio, Family_CHD, Current_smoker, Treated_HTN, DM, RA, AF, Renal_disease, Age_BMI, Age_Townsend, Age_SBP, Age_Family_CHD, Age_Smoking, Age_Treated_HTN, Age_DM, Age_AF]
INFO:pymc.sampling.mcmc:NUTS: [Intercept, White, Indian, Pakistani, Bangladeshi, Other_Asian, Black_Caribbean, Black_African, Chinese, Other,

Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 64 seconds.
INFO:pymc.sampling.mcmc:Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 64 seconds.
Modeling the probability that outcome==1
INFO:bambi:Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
INFO:pymc.sampling.mcmc:Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
INFO:pymc.sampling.mcmc:Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, White, Indian, Pakistani, Bangladeshi, Other_Asian, Black_Caribbean, Black_African, Chinese, Other, Age, BMI, Townsend, SBP, CholHDL_ratio, Family_CHD, Current_smoker, Treated_HTN, DM, RA, AF, Renal_disease, Age_BMI, Age_Townsend, Age_SBP, Age_Family_CHD, Age_Smoking, Age_Treated_HTN, Age_DM, Age_AF]
INFO:pymc.sampling.mcmc:NUTS: [Intercept, White, Indian, Pakistani, Bangladeshi, Other_Asian, Black_Caribbean, Black_African, Chinese, Other,

Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 56 seconds.
INFO:pymc.sampling.mcmc:Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 56 seconds.
Modeling the probability that outcome==1
INFO:bambi:Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
INFO:pymc.sampling.mcmc:Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
INFO:pymc.sampling.mcmc:Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, White, Indian, Pakistani, Bangladeshi, Other_Asian, Black_Caribbean, Black_African, Chinese, Other, Age, BMI, Townsend, SBP, CholHDL_ratio, Family_CHD, Current_smoker, Treated_HTN, DM, RA, AF, Renal_disease, Age_BMI, Age_Townsend, Age_SBP, Age_Family_CHD, Age_Smoking, Age_Treated_HTN, Age_DM, Age_AF]
INFO:pymc.sampling.mcmc:NUTS: [Intercept, White, Indian, Pakistani, Bangladeshi, Other_Asian, Black_Caribbean, Black_African, Chinese, Other,

Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 49 seconds.
INFO:pymc.sampling.mcmc:Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 49 seconds.
Modeling the probability that outcome==1
INFO:bambi:Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
INFO:pymc.sampling.mcmc:Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
INFO:pymc.sampling.mcmc:Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, White, Indian, Pakistani, Bangladeshi, Other_Asian, Black_Caribbean, Black_African, Chinese, Other, Age, BMI, Townsend, SBP, CholHDL_ratio, Family_CHD, Current_smoker, Treated_HTN, DM, RA, AF, Renal_disease, Age_BMI, Age_Townsend, Age_SBP, Age_Family_CHD, Age_Smoking, Age_Treated_HTN, Age_DM, Age_AF]
INFO:pymc.sampling.mcmc:NUTS: [Intercept, White, Indian, Pakistani, Bangladeshi, Other_Asian, Black_Caribbean, Black_African, Chinese, Other,

Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 42 seconds.
INFO:pymc.sampling.mcmc:Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 42 seconds.
Modeling the probability that outcome==1
INFO:bambi:Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
INFO:pymc.sampling.mcmc:Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
INFO:pymc.sampling.mcmc:Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, White, Indian, Pakistani, Bangladeshi, Other_Asian, Black_Caribbean, Black_African, Chinese, Other, Age, BMI, Townsend, SBP, CholHDL_ratio, Family_CHD, Current_smoker, Treated_HTN, DM, RA, AF, Renal_disease, Age_BMI, Age_Townsend, Age_SBP, Age_Family_CHD, Age_Smoking, Age_Treated_HTN, Age_DM, Age_AF]
INFO:pymc.sampling.mcmc:NUTS: [Intercept, White, Indian, Pakistani, Bangladeshi, Other_Asian, Black_Caribbean, Black_African, Chinese, Other,

Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 47 seconds.
INFO:pymc.sampling.mcmc:Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 47 seconds.
Modeling the probability that outcome==1
INFO:bambi:Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
INFO:pymc.sampling.mcmc:Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
INFO:pymc.sampling.mcmc:Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, White, Indian, Pakistani, Bangladeshi, Other_Asian, Black_Caribbean, Black_African, Chinese, Other, Age, BMI, Townsend, SBP, CholHDL_ratio, Family_CHD, Current_smoker, Treated_HTN, DM, RA, AF, Renal_disease, Age_BMI, Age_Townsend, Age_SBP, Age_Family_CHD, Age_Smoking, Age_Treated_HTN, Age_DM, Age_AF]
INFO:pymc.sampling.mcmc:NUTS: [Intercept, White, Indian, Pakistani, Bangladeshi, Other_Asian, Black_Caribbean, Black_African, Chinese, Other,

Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 40 seconds.
INFO:pymc.sampling.mcmc:Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 40 seconds.
Modeling the probability that outcome==1
INFO:bambi:Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
INFO:pymc.sampling.mcmc:Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
INFO:pymc.sampling.mcmc:Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, White, Indian, Pakistani, Bangladeshi, Other_Asian, Black_Caribbean, Black_African, Chinese, Other, Age, BMI, Townsend, SBP, CholHDL_ratio, Family_CHD, Current_smoker, Treated_HTN, DM, RA, AF, Renal_disease, Age_BMI, Age_Townsend, Age_SBP, Age_Family_CHD, Age_Smoking, Age_Treated_HTN, Age_DM, Age_AF]
INFO:pymc.sampling.mcmc:NUTS: [Intercept, White, Indian, Pakistani, Bangladeshi, Other_Asian, Black_Caribbean, Black_African, Chinese, Other,

Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 46 seconds.
INFO:pymc.sampling.mcmc:Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 46 seconds.
Modeling the probability that outcome==1
INFO:bambi:Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
INFO:pymc.sampling.mcmc:Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
INFO:pymc.sampling.mcmc:Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, White, Indian, Pakistani, Bangladeshi, Other_Asian, Black_Caribbean, Black_African, Chinese, Other, Age, BMI, Townsend, SBP, CholHDL_ratio, Family_CHD, Current_smoker, Treated_HTN, DM, RA, AF, Renal_disease, Age_BMI, Age_Townsend, Age_SBP, Age_Family_CHD, Age_Smoking, Age_Treated_HTN, Age_DM, Age_AF]
INFO:pymc.sampling.mcmc:NUTS: [Intercept, White, Indian, Pakistani, Bangladeshi, Other_Asian, Black_Caribbean, Black_African, Chinese, Other,

Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 46 seconds.
INFO:pymc.sampling.mcmc:Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 46 seconds.
Modeling the probability that outcome==1
INFO:bambi:Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
INFO:pymc.sampling.mcmc:Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
INFO:pymc.sampling.mcmc:Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, White, Indian, Pakistani, Bangladeshi, Other_Asian, Black_Caribbean, Black_African, Chinese, Other, Age, BMI, Townsend, SBP, CholHDL_ratio, Family_CHD, Current_smoker, Treated_HTN, DM, RA, AF, Renal_disease, Age_BMI, Age_Townsend, Age_SBP, Age_Family_CHD, Age_Smoking, Age_Treated_HTN, Age_DM, Age_AF]
INFO:pymc.sampling.mcmc:NUTS: [Intercept, White, Indian, Pakistani, Bangladeshi, Other_Asian, Black_Caribbean, Black_African, Chinese, Other,

Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 55 seconds.
INFO:pymc.sampling.mcmc:Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 55 seconds.
Modeling the probability that outcome==1
INFO:bambi:Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
INFO:pymc.sampling.mcmc:Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
INFO:pymc.sampling.mcmc:Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, White, Indian, Pakistani, Bangladeshi, Other_Asian, Black_Caribbean, Black_African, Chinese, Other, Age, BMI, Townsend, SBP, CholHDL_ratio, Family_CHD, Current_smoker, Treated_HTN, DM, RA, AF, Renal_disease, Age_BMI, Age_Townsend, Age_SBP, Age_Family_CHD, Age_Smoking, Age_Treated_HTN, Age_DM, Age_AF]
INFO:pymc.sampling.mcmc:NUTS: [Intercept, White, Indian, Pakistani, Bangladeshi, Other_Asian, Black_Caribbean, Black_African, Chinese, Other,

Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 33 seconds.
INFO:pymc.sampling.mcmc:Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 33 seconds.
Modeling the probability that outcome==1
INFO:bambi:Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
INFO:pymc.sampling.mcmc:Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
INFO:pymc.sampling.mcmc:Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, White, Indian, Pakistani, Bangladeshi, Other_Asian, Black_Caribbean, Black_African, Chinese, Other, Age, BMI, Townsend, SBP, CholHDL_ratio, Family_CHD, Current_smoker, Treated_HTN, DM, RA, AF, Renal_disease, Age_BMI, Age_Townsend, Age_SBP, Age_Family_CHD, Age_Smoking, Age_Treated_HTN, Age_DM, Age_AF]
INFO:pymc.sampling.mcmc:NUTS: [Intercept, White, Indian, Pakistani, Bangladeshi, Other_Asian, Black_Caribbean, Black_African, Chinese, Other,

Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 35 seconds.
INFO:pymc.sampling.mcmc:Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 35 seconds.
Modeling the probability that outcome==1
INFO:bambi:Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
INFO:pymc.sampling.mcmc:Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
INFO:pymc.sampling.mcmc:Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, White, Indian, Pakistani, Bangladeshi, Other_Asian, Black_Caribbean, Black_African, Chinese, Other, Age, BMI, Townsend, SBP, CholHDL_ratio, Family_CHD, Current_smoker, Treated_HTN, DM, RA, AF, Renal_disease, Age_BMI, Age_Townsend, Age_SBP, Age_Family_CHD, Age_Smoking, Age_Treated_HTN, Age_DM, Age_AF]
INFO:pymc.sampling.mcmc:NUTS: [Intercept, White, Indian, Pakistani, Bangladeshi, Other_Asian, Black_Caribbean, Black_African, Chinese, Other,

Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 41 seconds.
INFO:pymc.sampling.mcmc:Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 41 seconds.
Modeling the probability that outcome==1
INFO:bambi:Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
INFO:pymc.sampling.mcmc:Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
INFO:pymc.sampling.mcmc:Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, White, Indian, Pakistani, Bangladeshi, Other_Asian, Black_Caribbean, Black_African, Chinese, Other, Age, BMI, Townsend, SBP, CholHDL_ratio, Family_CHD, Current_smoker, Treated_HTN, DM, RA, AF, Renal_disease, Age_BMI, Age_Townsend, Age_SBP, Age_Family_CHD, Age_Smoking, Age_Treated_HTN, Age_DM, Age_AF]
INFO:pymc.sampling.mcmc:NUTS: [Intercept, White, Indian, Pakistani, Bangladeshi, Other_Asian, Black_Caribbean, Black_African, Chinese, Other,

Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 42 seconds.
INFO:pymc.sampling.mcmc:Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 42 seconds.
Modeling the probability that outcome==1
INFO:bambi:Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
INFO:pymc.sampling.mcmc:Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
INFO:pymc.sampling.mcmc:Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, White, Indian, Pakistani, Bangladeshi, Other_Asian, Black_Caribbean, Black_African, Chinese, Other, Age, BMI, Townsend, SBP, CholHDL_ratio, Family_CHD, Current_smoker, Treated_HTN, DM, RA, AF, Renal_disease, Age_BMI, Age_Townsend, Age_SBP, Age_Family_CHD, Age_Smoking, Age_Treated_HTN, Age_DM, Age_AF]
INFO:pymc.sampling.mcmc:NUTS: [Intercept, White, Indian, Pakistani, Bangladeshi, Other_Asian, Black_Caribbean, Black_African, Chinese, Other,

Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 41 seconds.
INFO:pymc.sampling.mcmc:Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 41 seconds.
Modeling the probability that outcome==1
INFO:bambi:Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
INFO:pymc.sampling.mcmc:Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
INFO:pymc.sampling.mcmc:Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, White, Indian, Pakistani, Bangladeshi, Other_Asian, Black_Caribbean, Black_African, Chinese, Other, Age, BMI, Townsend, SBP, CholHDL_ratio, Family_CHD, Current_smoker, Treated_HTN, DM, RA, AF, Renal_disease, Age_BMI, Age_Townsend, Age_SBP, Age_Family_CHD, Age_Smoking, Age_Treated_HTN, Age_DM, Age_AF]
INFO:pymc.sampling.mcmc:NUTS: [Intercept, White, Indian, Pakistani, Bangladeshi, Other_Asian, Black_Caribbean, Black_African, Chinese, Other,

Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 47 seconds.
INFO:pymc.sampling.mcmc:Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 47 seconds.
Modeling the probability that outcome==1
INFO:bambi:Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
INFO:pymc.sampling.mcmc:Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
INFO:pymc.sampling.mcmc:Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, White, Indian, Pakistani, Bangladeshi, Other_Asian, Black_Caribbean, Black_African, Chinese, Other, Age, BMI, Townsend, SBP, CholHDL_ratio, Family_CHD, Current_smoker, Treated_HTN, DM, RA, AF, Renal_disease, Age_BMI, Age_Townsend, Age_SBP, Age_Family_CHD, Age_Smoking, Age_Treated_HTN, Age_DM, Age_AF]
INFO:pymc.sampling.mcmc:NUTS: [Intercept, White, Indian, Pakistani, Bangladeshi, Other_Asian, Black_Caribbean, Black_African, Chinese, Other,

Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 42 seconds.
INFO:pymc.sampling.mcmc:Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 42 seconds.
Modeling the probability that outcome==1
INFO:bambi:Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
INFO:pymc.sampling.mcmc:Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
INFO:pymc.sampling.mcmc:Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, White, Indian, Pakistani, Bangladeshi, Other_Asian, Black_Caribbean, Black_African, Chinese, Other, Age, BMI, Townsend, SBP, CholHDL_ratio, Family_CHD, Current_smoker, Treated_HTN, DM, RA, AF, Renal_disease, Age_BMI, Age_Townsend, Age_SBP, Age_Family_CHD, Age_Smoking, Age_Treated_HTN, Age_DM, Age_AF]
INFO:pymc.sampling.mcmc:NUTS: [Intercept, White, Indian, Pakistani, Bangladeshi, Other_Asian, Black_Caribbean, Black_African, Chinese, Other,

Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 34 seconds.
INFO:pymc.sampling.mcmc:Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 34 seconds.
Modeling the probability that outcome==1
INFO:bambi:Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
INFO:pymc.sampling.mcmc:Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
INFO:pymc.sampling.mcmc:Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, White, Indian, Pakistani, Bangladeshi, Other_Asian, Black_Caribbean, Black_African, Chinese, Other, Age, BMI, Townsend, SBP, CholHDL_ratio, Family_CHD, Current_smoker, Treated_HTN, DM, RA, AF, Renal_disease, Age_BMI, Age_Townsend, Age_SBP, Age_Family_CHD, Age_Smoking, Age_Treated_HTN, Age_DM, Age_AF]
INFO:pymc.sampling.mcmc:NUTS: [Intercept, White, Indian, Pakistani, Bangladeshi, Other_Asian, Black_Caribbean, Black_African, Chinese, Other,

Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 55 seconds.
INFO:pymc.sampling.mcmc:Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 55 seconds.
Modeling the probability that outcome==1
INFO:bambi:Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
INFO:pymc.sampling.mcmc:Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
INFO:pymc.sampling.mcmc:Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, White, Indian, Pakistani, Bangladeshi, Other_Asian, Black_Caribbean, Black_African, Chinese, Other, Age, BMI, Townsend, SBP, CholHDL_ratio, Family_CHD, Current_smoker, Treated_HTN, DM, RA, AF, Renal_disease, Age_BMI, Age_Townsend, Age_SBP, Age_Family_CHD, Age_Smoking, Age_Treated_HTN, Age_DM, Age_AF]
INFO:pymc.sampling.mcmc:NUTS: [Intercept, White, Indian, Pakistani, Bangladeshi, Other_Asian, Black_Caribbean, Black_African, Chinese, Other,

Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 50 seconds.
INFO:pymc.sampling.mcmc:Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 50 seconds.
Modeling the probability that outcome==1
INFO:bambi:Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
INFO:pymc.sampling.mcmc:Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
INFO:pymc.sampling.mcmc:Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, White, Indian, Pakistani, Bangladeshi, Other_Asian, Black_Caribbean, Black_African, Chinese, Other, Age, BMI, Townsend, SBP, CholHDL_ratio, Family_CHD, Current_smoker, Treated_HTN, DM, RA, AF, Renal_disease, Age_BMI, Age_Townsend, Age_SBP, Age_Family_CHD, Age_Smoking, Age_Treated_HTN, Age_DM, Age_AF]
INFO:pymc.sampling.mcmc:NUTS: [Intercept, White, Indian, Pakistani, Bangladeshi, Other_Asian, Black_Caribbean, Black_African, Chinese, Other,

Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 40 seconds.
INFO:pymc.sampling.mcmc:Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 40 seconds.
Modeling the probability that outcome==1
INFO:bambi:Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
INFO:pymc.sampling.mcmc:Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
INFO:pymc.sampling.mcmc:Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, White, Indian, Pakistani, Bangladeshi, Other_Asian, Black_Caribbean, Black_African, Chinese, Other, Age, BMI, Townsend, SBP, CholHDL_ratio, Family_CHD, Current_smoker, Treated_HTN, DM, RA, AF, Renal_disease, Age_BMI, Age_Townsend, Age_SBP, Age_Family_CHD, Age_Smoking, Age_Treated_HTN, Age_DM, Age_AF]
INFO:pymc.sampling.mcmc:NUTS: [Intercept, White, Indian, Pakistani, Bangladeshi, Other_Asian, Black_Caribbean, Black_African, Chinese, Other,

Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 49 seconds.
INFO:pymc.sampling.mcmc:Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 49 seconds.
Modeling the probability that outcome==1
INFO:bambi:Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
INFO:pymc.sampling.mcmc:Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
INFO:pymc.sampling.mcmc:Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, White, Indian, Pakistani, Bangladeshi, Other_Asian, Black_Caribbean, Black_African, Chinese, Other, Age, BMI, Townsend, SBP, CholHDL_ratio, Family_CHD, Current_smoker, Treated_HTN, DM, RA, AF, Renal_disease, Age_BMI, Age_Townsend, Age_SBP, Age_Family_CHD, Age_Smoking, Age_Treated_HTN, Age_DM, Age_AF]
INFO:pymc.sampling.mcmc:NUTS: [Intercept, White, Indian, Pakistani, Bangladeshi, Other_Asian, Black_Caribbean, Black_African, Chinese, Other,

Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 61 seconds.
INFO:pymc.sampling.mcmc:Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 61 seconds.
Modeling the probability that outcome==1
INFO:bambi:Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
INFO:pymc.sampling.mcmc:Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
INFO:pymc.sampling.mcmc:Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, White, Indian, Pakistani, Bangladeshi, Other_Asian, Black_Caribbean, Black_African, Chinese, Other, Age, BMI, Townsend, SBP, CholHDL_ratio, Family_CHD, Current_smoker, Treated_HTN, DM, RA, AF, Renal_disease, Age_BMI, Age_Townsend, Age_SBP, Age_Family_CHD, Age_Smoking, Age_Treated_HTN, Age_DM, Age_AF]
INFO:pymc.sampling.mcmc:NUTS: [Intercept, White, Indian, Pakistani, Bangladeshi, Other_Asian, Black_Caribbean, Black_African, Chinese, Other,

Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 47 seconds.
INFO:pymc.sampling.mcmc:Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 47 seconds.
Modeling the probability that outcome==1
INFO:bambi:Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
INFO:pymc.sampling.mcmc:Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
INFO:pymc.sampling.mcmc:Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, White, Indian, Pakistani, Bangladeshi, Other_Asian, Black_Caribbean, Black_African, Chinese, Other, Age, BMI, Townsend, SBP, CholHDL_ratio, Family_CHD, Current_smoker, Treated_HTN, DM, RA, AF, Renal_disease, Age_BMI, Age_Townsend, Age_SBP, Age_Family_CHD, Age_Smoking, Age_Treated_HTN, Age_DM, Age_AF]
INFO:pymc.sampling.mcmc:NUTS: [Intercept, White, Indian, Pakistani, Bangladeshi, Other_Asian, Black_Caribbean, Black_African, Chinese, Other,

Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 48 seconds.
INFO:pymc.sampling.mcmc:Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 48 seconds.
Modeling the probability that outcome==1
INFO:bambi:Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
INFO:pymc.sampling.mcmc:Initializing NUTS using jitter+adapt_diag...
Sequential sampling (4 chains in 1 job)
INFO:pymc.sampling.mcmc:Sequential sampling (4 chains in 1 job)
NUTS: [Intercept, White, Indian, Pakistani, Bangladeshi, Other_Asian, Black_Caribbean, Black_African, Chinese, Other, Age, BMI, Townsend, SBP, CholHDL_ratio, Family_CHD, Current_smoker, Treated_HTN, DM, RA, AF, Renal_disease, Age_BMI, Age_Townsend, Age_SBP, Age_Family_CHD, Age_Smoking, Age_Treated_HTN, Age_DM, Age_AF]
INFO:pymc.sampling.mcmc:NUTS: [Intercept, White, Indian, Pakistani, Bangladeshi, Other_Asian, Black_Caribbean, Black_African, Chinese, Other,

Output()

Sampling 2 chains for 250 tune and 1_000 draw iterations (500 + 2_000 draws total) took 38 seconds.
INFO:pymc.sampling.mcmc:Sampling 2 chains for 250 tune and 1_000 draw iterations (500 + 2_000 draws total) took 38 seconds.
We recommend running at least 4 chains for robust computation of convergence diagnostics
INFO:pymc.stats.convergence:We recommend running at least 4 chains for robust computation of convergence diagnostics
Modeling the probability that outcome==1
INFO:bambi:Modeling the probability that outcome==1


KeyboardInterrupt: 

## Plot results

In [16]:
metrics_df = pd.read_csv('performance_metrics.csv')
metrics_df["Time"] = pd.to_datetime(metrics_df["Time"])


sns.set(font_scale=1.2)

for data_type in metrics_df["Data_Type"].unique():
    
    if data_type == "COVID Simulation":
        switchDate = pd.to_datetime('01-04-2020', dayfirst=True)
        metric_choice = "Accuracy"
        plt.figure(figsize=(14, 7))
        data_subset = metrics_df[metrics_df["Data_Type"] == data_type]
        # change method static threshold to static threshold (0.86 AUROC)
        data_subset["Method"] = data_subset["Method"].replace({"Static Threshold": "Static Threshold (0.86 AUROC)"})
        sns.set_style("whitegrid")
        sns.color_palette("colorblind")
        sns.lineplot(
            data=data_subset,
            x="Time",
            y=metric_choice,
            hue="Method",
            ci=None, # could remove shaded regions with `ci=None` if it's too busy
            err_kws={"alpha": .1}, # change transparency of shaded areas
            style="Method" # could use `markers=True, dashes=False` to differentiate lines further
        )

        # get the dates when recalibration happened for each method
        # first row from the ttd table
        ttd = pd.read_csv('covid_ttd_tbl.csv')
        regular_test = ttd['regular_ttd'].values[0]
        static_ttd = ttd['static_ttd'].values[0]
        spc3_ttd = ttd['spc_ttd3'].values[0]
        spc5_ttd = ttd['spc_ttd5'].values[0]
        spc7_ttd = ttd['spc_ttd7'].values[0]
        bayesian_ttd = ttd['bayesian_ttd'].values[0]

        ax = plt.gca()
        handles, labels = ax.get_legend_handles_labels()
        method_colors = {label: handle.get_color() for handle, label in zip(handles, labels)}
        

        # vertical lines to show when the updates happened
        #plt.axvline(x=switchDate, color='black', linestyle='-', alpha=0.7, label=f'COVID-19 Pandemic Start: {switchDate.date()}')
        
        plt.scatter(
            x=[switchDate],
            y=[min(data_subset[metric_choice])],  # Place the marker at the bottom of the y-axis
            color='black',
            marker='v',       # 'v' is a downward-pointing triangle
            s=100,             # size of the marker
            label=f'COVID-19 Pandemic Start: {switchDate.date()}'
        )


        plt.axvline(x=switchDate + timedelta(days=regular_test), color=method_colors.get("Regular Testing", "gray"), linestyle='--', alpha=0.7)#, label=f'Regular Testing Recalibration: {int(regular_test)} days')
        plt.axvline(x=switchDate + timedelta(days=static_ttd), color=method_colors.get("Static Threshold (0.86 AUROC)", "gray"), linestyle='--', alpha=0.7)#,, label=f'Static Threshold Recalibration: {int(static_ttd)} days')
        plt.axvline(x=switchDate + timedelta(days=spc3_ttd), color=method_colors.get("SPC3", "gray"), linestyle='--', alpha=0.7)#,, label=f'SPC3 Recalibration: {int(spc3_ttd)} days')
        plt.axvline(x=switchDate + timedelta(days=spc5_ttd), color=method_colors.get("SPC5", "gray"), linestyle='--', alpha=0.7)#,, label=f'SPC5 Recalibration: {int(spc5_ttd)} days')
        plt.axvline(x=switchDate + timedelta(days=spc7_ttd), color=method_colors.get("SPC7", "gray"), linestyle='--', alpha=0.7)#,, label=f'SPC7 Recalibration: {int(spc7_ttd)} days')
        plt.axvline(x=switchDate + timedelta(days=bayesian_ttd), color=method_colors.get("Bayesian", "gray"), linestyle='--', alpha=0.7)#,, label=f'Bayesian Refit: {int(bayesian_ttd)} days')

        plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
        plt.xticks(rotation=45)
        plt.tight_layout()
        plt.savefig(f"../docs/images/performance_comparison/{data_type}_{metric_choice}_with_std_bounds.png", dpi=600, bbox_inches='tight')
        plt.show()
    
    if data_type == "Slow Change Simulation":
        metric_choice = "AUPRC"
        plt.figure(figsize=(14, 7))
        data_subset = metrics_df[metrics_df["Data_Type"] == data_type]
        data_subset["Method"] = data_subset["Method"].replace({"Static Threshold": "Static Threshold (0.851 AUROC)"})
        sns.set_style("whitegrid")
        sns.lineplot(
            data=data_subset,
            x="Time",
            y=metric_choice,
            hue="Method",
            ci=None, # could remove shaded regions with `ci=None` if it's too busy
            err_kws={"alpha": .1}, # change transparency of shaded areas
            style="Method" # could use `markers=True, dashes=False` to differentiate lines further
        )

        # get the dates when recalibration happened for each method
        # first row from the ttd table
        ttd = pd.read_csv('input_prev_ttd_tbl.csv')
        regular_test = ttd['regular_ttd'].values[0]
        static_ttd = ttd['static_ttd'].values[0]
        spc3_ttd = ttd['spc_ttd3'].values[0]
        spc5_ttd = ttd['spc_ttd5'].values[0]
        spc7_ttd = ttd['spc_ttd7'].values[0]
        bayesian_ttd = ttd['bayesian_ttd'].values[0]

        ax = plt.gca()
        handles, labels = ax.get_legend_handles_labels()
        method_colors = {label: handle.get_color() for handle, label in zip(handles, labels)}

        plt.axvline(x=startDate + timedelta(days=int(regular_test)), color=method_colors.get("Regular Testing", "gray"), linestyle='--', alpha=0.7)#, label=f'Regular Testing Recalibration: {int(regular_test)} days')
        plt.axvline(x=startDate + timedelta(days=int(static_ttd)), color=method_colors.get("Static Threshold (0.86 AUROC)", "gray"), linestyle='--', alpha=0.7)#,, label=f'Static Threshold Recalibration: {int(static_ttd)} days')
        if not pd.isna(spc3_ttd):
            plt.axvline(x=startDate + timedelta(days=int(spc3_ttd)), color=method_colors.get("SPC3", "gray"), linestyle='--', alpha=0.7)#,, label=f'SPC3 Recalibration: {int(spc3_ttd)} days')
        if not pd.isna(spc5_ttd):
            plt.axvline(x=startDate + timedelta(days=int(spc5_ttd)), color=method_colors.get("SPC5", "gray"), linestyle='--', alpha=0.7)#,, label=f'SPC5 Recalibration: {int(spc5_ttd)} days')
        if not pd.isna(spc7_ttd):
            plt.axvline(x=startDate + timedelta(days=int(spc7_ttd)), color=method_colors.get("SPC7", "gray"), linestyle='--', alpha=0.7)#,, label=f'SPC7 Recalibration: {int(spc7_ttd)} days')
        plt.axvline(x=startDate + timedelta(days=int(bayesian_ttd)), color=method_colors.get("Bayesian", "gray"), linestyle='--', alpha=0.7)#,, label=f'Bayesian Refit: {int(bayesian_ttd)} days')

        plt.xticks(rotation=45)
        plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
        plt.tight_layout()
        plt.savefig(f"../docs/images/performance_comparison/{data_type}_{metric_choice}_with_std_bounds.png", dpi=600, bbox_inches='tight')
        plt.show()

    if data_type == "Outcome Prevalence Simulation" or data_type == "Multivariate Simulation":
        metric_choice = "CalibrationSlope"
        plt.figure(figsize=(14, 7))
        data_subset = metrics_df[metrics_df["Data_Type"] == data_type]
        if data_type == "Outcome Prevalence Simulation":
            data_subset["Method"] = data_subset["Method"].replace({"Static Threshold": "Static Threshold (0.77 AUROC)"})
        elif data_type == "Multivariate Simulation":
            data_subset["Method"] = data_subset["Method"].replace({"Static Threshold": "Static Threshold (0.811 AUROC)"})
        sns.set_style("whitegrid")
        sns.lineplot(
            data=data_subset,
            x="Time",
            y=metric_choice,
            hue="Method",
            ci=None, # could remove shaded regions with `ci=None` if it's too busy
            err_kws={"alpha": .1}, # change transparency of shaded areas
            style="Method" # could use `markers=True, dashes=False` to differentiate lines further
        )

        # get the dates when recalibration happened for each method
        # first row from the ttd table
        if data_type == "Outcome Prevalence Simulation":
            ttd = pd.read_csv('output_prev_ttd_tbl.csv')
        if data_type == "Multivariate Simulation":
            ttd = pd.read_csv('multivariate_ttd_tbl.csv')

        regular_test = ttd['regular_ttd'].values[0]
        static_ttd = ttd['static_ttd'].values[0]
        spc3_ttd = ttd['spc_ttd3'].values[0]
        spc5_ttd = ttd['spc_ttd5'].values[0]
        spc7_ttd = ttd['spc_ttd7'].values[0]
        bayesian_ttd = ttd['bayesian_ttd'].values[0]

        ax = plt.gca()
        handles, labels = ax.get_legend_handles_labels()
        method_colors = {label: handle.get_color() for handle, label in zip(handles, labels)}

        plt.axvline(x=startDate + timedelta(days=int(regular_test)), color=method_colors.get("Regular Testing", "gray"), linestyle='--', alpha=0.7)#, label=f'Regular Testing Recalibration: {int(regular_test)} days')
        plt.axvline(x=startDate + timedelta(days=int(static_ttd)), color=method_colors.get("Static Threshold (0.86 AUROC)", "gray"), linestyle='--', alpha=0.7)#,, label=f'Static Threshold Recalibration: {int(static_ttd)} days')
        if not pd.isna(spc3_ttd):
            plt.axvline(x=startDate + timedelta(days=int(spc3_ttd)), color=method_colors.get("SPC3", "gray"), linestyle='--', alpha=0.7)#,, label=f'SPC3 Recalibration: {int(spc3_ttd)} days')
        if not pd.isna(spc5_ttd):
            plt.axvline(x=startDate + timedelta(days=int(spc5_ttd)), color=method_colors.get("SPC5", "gray"), linestyle='--', alpha=0.7)#,, label=f'SPC5 Recalibration: {int(spc5_ttd)} days')
        if not pd.isna(spc7_ttd):
            plt.axvline(x=startDate + timedelta(days=int(spc7_ttd)), color=method_colors.get("SPC7", "gray"), linestyle='--', alpha=0.7)#,, label=f'SPC7 Recalibration: {int(spc7_ttd)} days')
        plt.axvline(x=startDate + timedelta(days=int(bayesian_ttd)), color=method_colors.get("Bayesian", "gray"), linestyle='--', alpha=0.7)#,, label=f'Bayesian Refit: {int(bayesian_ttd)} days')

        plt.xticks(rotation=45)
        plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
        plt.tight_layout()
        plt.savefig(f"../docs/images/performance_comparison/{data_type}_{metric_choice}_with_std_bounds.png", dpi=600, bbox_inches='tight')
        plt.show()

        metric_choice = "OE"
        plt.figure(figsize=(14, 7))
        data_subset = metrics_df[metrics_df["Data_Type"] == data_type]
        if data_type == "Outcome Prevalence Simulation":
            data_subset["Method"] = data_subset["Method"].replace({"Static Threshold": "Static Threshold (0.77 AUROC)"})
        elif data_type == "Multivariate Simulation":
            data_subset["Method"] = data_subset["Method"].replace({"Static Threshold": "Static Threshold (0.811 AUROC)"})
        sns.set_style("whitegrid")
        sns.lineplot(
            data=data_subset,
            x="Time",
            y=metric_choice,
            hue="Method",
            ci=None, # could remove shaded regions with `ci=None` if it's too busy
            err_kws={"alpha": .1}, # change transparency of shaded areas
            style="Method" # could use `markers=True, dashes=False` to differentiate lines further
        )

        # get the dates when recalibration happened for each method
        # first row from the ttd table
        if data_type == "Outcome Prevalence Simulation":
            ttd = pd.read_csv('output_prev_ttd_tbl.csv')
        if data_type == "Multivariate Simulation":
            ttd = pd.read_csv('multivariate_ttd_tbl.csv')

        regular_test = ttd['regular_ttd'].values[0]
        static_ttd = ttd['static_ttd'].values[0]
        spc3_ttd = ttd['spc_ttd3'].values[0]
        spc5_ttd = ttd['spc_ttd5'].values[0]
        spc7_ttd = ttd['spc_ttd7'].values[0]
        bayesian_ttd = ttd['bayesian_ttd'].values[0]

        ax = plt.gca()
        handles, labels = ax.get_legend_handles_labels()
        method_colors = {label: handle.get_color() for handle, label in zip(handles, labels)}

        plt.axvline(x=startDate + timedelta(days=int(regular_test)), color=method_colors.get("Regular Testing", "gray"), linestyle='--', alpha=0.7)#, label=f'Regular Testing Recalibration: {int(regular_test)} days')
        plt.axvline(x=startDate + timedelta(days=int(static_ttd)), color=method_colors.get("Static Threshold (0.86 AUROC)", "gray"), linestyle='--', alpha=0.7)#,, label=f'Static Threshold Recalibration: {int(static_ttd)} days')
        if not pd.isna(spc3_ttd):
            plt.axvline(x=startDate + timedelta(days=int(spc3_ttd)), color=method_colors.get("SPC3", "gray"), linestyle='--', alpha=0.7)#,, label=f'SPC3 Recalibration: {int(spc3_ttd)} days')
        if not pd.isna(spc5_ttd):
            plt.axvline(x=startDate + timedelta(days=int(spc5_ttd)), color=method_colors.get("SPC5", "gray"), linestyle='--', alpha=0.7)#,, label=f'SPC5 Recalibration: {int(spc5_ttd)} days')
        if not pd.isna(spc7_ttd):
            plt.axvline(x=startDate + timedelta(days=int(spc7_ttd)), color=method_colors.get("SPC7", "gray"), linestyle='--', alpha=0.7)#,, label=f'SPC7 Recalibration: {int(spc7_ttd)} days')
        plt.axvline(x=startDate + timedelta(days=int(bayesian_ttd)), color=method_colors.get("Bayesian", "gray"), linestyle='--', alpha=0.7)#,, label=f'Bayesian Refit: {int(bayesian_ttd)} days')

        plt.xticks(rotation=45)
        plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
        plt.tight_layout()
        plt.savefig(f"../docs/images/performance_comparison/{data_type}_{metric_choice}_with_std_bounds.png", dpi=600, bbox_inches='tight')
        plt.show()

        metric_choice = "CITL"
        plt.figure(figsize=(14, 7))
        data_subset = metrics_df[metrics_df["Data_Type"] == data_type]
        if data_type == "Outcome Prevalence Simulation":
            data_subset["Method"] = data_subset["Method"].replace({"Static Threshold": "Static Threshold (0.77 AUROC)"})
        elif data_type == "Multivariate Simulation":
            data_subset["Method"] = data_subset["Method"].replace({"Static Threshold": "Static Threshold (0.811 AUROC)"})
        sns.set_style("whitegrid")
        sns.lineplot(
            data=data_subset,
            x="Time",
            y=metric_choice,
            hue="Method",
            ci=None, # could remove shaded regions with `ci=None` if it's too busy
            err_kws={"alpha": .1}, # change transparency of shaded areas
            style="Method" # could use `markers=True, dashes=False` to differentiate lines further
        )

        # get the dates when recalibration happened for each method
        # first row from the ttd table
        if data_type == "Outcome Prevalence Simulation":
            ttd = pd.read_csv('output_prev_ttd_tbl.csv')
        if data_type == "Multivariate Simulation":
            ttd = pd.read_csv('multivariate_ttd_tbl.csv')
        regular_test = ttd['regular_ttd'].values[0]
        static_ttd = ttd['static_ttd'].values[0]
        spc3_ttd = ttd['spc_ttd3'].values[0]
        spc5_ttd = ttd['spc_ttd5'].values[0]
        spc7_ttd = ttd['spc_ttd7'].values[0]
        bayesian_ttd = ttd['bayesian_ttd'].values[0]

        ax = plt.gca()
        handles, labels = ax.get_legend_handles_labels()
        method_colors = {label: handle.get_color() for handle, label in zip(handles, labels)}

        plt.axvline(x=startDate + timedelta(days=int(regular_test)), color=method_colors.get("Regular Testing", "gray"), linestyle='--', alpha=0.7)#, label=f'Regular Testing Recalibration: {int(regular_test)} days')
        plt.axvline(x=startDate + timedelta(days=int(static_ttd)), color=method_colors.get("Static Threshold (0.86 AUROC)", "gray"), linestyle='--', alpha=0.7)#,, label=f'Static Threshold Recalibration: {int(static_ttd)} days')
        if not pd.isna(spc3_ttd):
            plt.axvline(x=startDate + timedelta(days=int(spc3_ttd)), color=method_colors.get("SPC3", "gray"), linestyle='--', alpha=0.7)#,, label=f'SPC3 Recalibration: {int(spc3_ttd)} days')
        if not pd.isna(spc5_ttd):
            plt.axvline(x=startDate + timedelta(days=int(spc5_ttd)), color=method_colors.get("SPC5", "gray"), linestyle='--', alpha=0.7)#,, label=f'SPC5 Recalibration: {int(spc5_ttd)} days')
        if not pd.isna(spc7_ttd):
            plt.axvline(x=startDate + timedelta(days=int(spc7_ttd)), color=method_colors.get("SPC7", "gray"), linestyle='--', alpha=0.7)#,, label=f'SPC7 Recalibration: {int(spc7_ttd)} days')
        plt.axvline(x=startDate + timedelta(days=int(bayesian_ttd)), color=method_colors.get("Bayesian", "gray"), linestyle='--', alpha=0.7)#,, label=f'Bayesian Refit: {int(bayesian_ttd)} days')

        plt.xticks(rotation=45)
        plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
        plt.tight_layout()
        plt.savefig(f"../docs/images/performance_comparison/{data_type}_{metric_choice}_with_std_bounds.png", dpi=600, bbox_inches='tight')
        plt.show()

FileNotFoundError: [Errno 2] No such file or directory: 'multivariate_ttd_tbl.csv'

In [None]:
# Plot each of the multivariate plots for each impact_or_prev value
for impact_or_prev in metrics_df[metrics_df["Data_Type"] == "Multivariate Simulation"]["impact_or_prev"].unique():
    metric_choice = "CalibrationSlope"
    plt.figure(figsize=(12, 6))
    data_subset = metrics_df[(metrics_df["Data_Type"] == "Multivariate Simulation") & (metrics_df["impact_or_prev"] == impact_or_prev)]

    sns.lineplot(
        data=data_subset,
        x="Time",
        y=metric_choice,
        hue="Method",
        ci=None
    )

    plt.xticks(rotation=45)
    plt.tight_layout()
    #plt.savefig(f"../docs/images/performance_comparison/Multivariate_Simulation_{impact_or_prev}_{metric_choice}_with_std_bounds.png", dpi=600, bbox_inches='tight')
    plt.show()

    metric_choice = "OE"
    plt.figure(figsize=(12, 6))
    data_subset = metrics_df[(metrics_df["Data_Type"] == "Multivariate Simulation") & (metrics_df["impact_or_prev"] == impact_or_prev)]

    sns.lineplot(
        data=data_subset,
        x="Time",
        y=metric_choice,
        hue="Method",
        ci=None
    )

    plt.xticks(rotation=45)
    plt.tight_layout()
    #plt.savefig(f"../docs/images/performance_comparison/Multivariate_Simulation_{impact_or_prev}_{metric_choice}_with_std_bounds.png", dpi=600, bbox_inches='tight')
    plt.show()

    metric_choice = "CITL"
    plt.figure(figsize=(12, 6))
    data_subset = metrics_df[(metrics_df["Data_Type"] == "Multivariate Simulation") & (metrics_df["impact_or_prev"] == impact_or_prev)]

    sns.lineplot(
        data=data_subset,
        x="Time",
        y=metric_choice,
        hue="Method",
        ci=None
    )


    plt.xticks(rotation=45)
    plt.tight_layout()
    #plt.savefig(f"../docs/images/performance_comparison/Multivariate_Simulation_{impact_or_prev}_{metric_choice}_with_std_bounds.png", dpi=600, bbox_inches='tight')
    plt.show()

In [None]:
def count_best_methods(metrics_df, metrics=["Accuracy", "AUROC", "Precision", "CalibrationSlope", "CITL", "OE", "AUPRC"]):
    """
    Finds the best method for each time point based on the given performance metrics.

    Parameters:
    metrics_df (DataFrame): The dataset containing performance metrics.
    metrics (list): List of metric column names to evaluate.

    Returns:
    dict: A dictionary containing the count of times each method outperformed others for each metric.
    """
    results = {}

    for metric in metrics:
        if metric == "OE" or metric == "CalbrationSlope":
            # For CalibrationSlope and O/E ratio, assume closer to 1 is better
            # Find the method with the closest CalibrationSlope to 1 for each time point
            closest_to_one = metrics_df.loc[metrics_df.groupby("Time")[metric].apply(lambda x: (x - 1).abs().idxmin())]["Method"]
            method_counts = closest_to_one.value_counts()
        elif metric == "CITL":
            # For CITL, assume closer to 0 is better
            closest_to_zero = metrics_df.loc[metrics_df.groupby("Time")[metric].apply(lambda x: x.abs().idxmin())]["Method"]
            method_counts = closest_to_zero.value_counts()
        else:
            best_methods = metrics_df.loc[metrics_df.groupby("Time")[metric].idxmax()]["Method"]
            method_counts = best_methods.value_counts()
        results[metric] = method_counts
        print(f"\nNumber of times each method outperformed others ({metric}):")
        print(method_counts)

    return results

# count best methods for all simulations
_ = count_best_methods(metrics_df)

In [None]:
print("############ COVID Simulation Metrics ############")
covid_metrics_df = metrics_df[metrics_df["Data_Type"]=="COVID Simulation"]
# Count best method for COVID simulation
_ = count_best_methods(covid_metrics_df)
# Compute min and max values for Accuracy, AUROC, and Precision for each method
print("\nMinimum and Maximum Metrics for COVID Simulation:")
min_max_metrics = covid_metrics_df.groupby("Method")[["Accuracy", "AUROC", "Precision", "CalibrationSlope", "CITL", "OE", "AUPRC"]].agg(["min", "mean", "std", "max"])
print(min_max_metrics)
# save dataframe to csv
min_max_metrics.to_csv('covid_metrics.csv', index=False)

print("############ Multivariate Simulation Metrics ############")
multivariate_metrics_df = metrics_df[metrics_df["Data_Type"]=="Multivariate Simulation"]
_ = count_best_methods(multivariate_metrics_df)
print("\nMinimum and Maximum Metrics for Multivariate Simulation:")
min_max_metrics = multivariate_metrics_df.groupby("Method")[["Accuracy", "AUROC", "Precision", "CalibrationSlope", "CITL", "OE", "AUPRC"]].agg(["min", "mean", "std", "max"])
print(min_max_metrics)
min_max_metrics.to_csv('multivariate_metrics.csv', index=False)

print("############ Slow Change Simulation Metrics ############")
slow_metrics_df = metrics_df[metrics_df["Data_Type"]=="Slow Change Simulation"]
_ = count_best_methods(slow_metrics_df)
print("\nMinimum and Maximum Metrics for Slow Change Simulation:")
min_max_metrics = slow_metrics_df.groupby("Method")[["Accuracy", "AUROC", "Precision", "CalibrationSlope", "CITL", "OE", "AUPRC"]].agg(["min", "mean", "std", "max"])
print(min_max_metrics)
min_max_metrics.to_csv('slow_change_metrics.csv', index=False)

print("############ Outcome Prevalence Simulation Metrics ############")
outcome_prev_metrics_df = metrics_df[metrics_df["Data_Type"]=="Outcome Prevalence Simulation"]
_ = count_best_methods(outcome_prev_metrics_df)
print("\nMinimum and Maximum Metrics for Outcome Prevalence Simulation:")
min_max_metrics = outcome_prev_metrics_df.groupby("Method")[["Accuracy", "AUROC", "Precision", "CalibrationSlope", "CITL", "OE", "AUPRC"]].agg(["min", "mean", "std", "max"])
print(min_max_metrics)
min_max_metrics.to_csv('outcome_prev_metrics.csv', index=False)

In [None]:
# Compute average accuracy, AUROC, and precision per method
method_avg_performance = metrics_df.groupby("Method")[["Accuracy", "AUROC", "Precision"]].mean()
method_sd_performance = metrics_df.groupby("Method")[["Accuracy", "AUROC", "Precision"]].std()

# Rank methods based on average accuracy
method_avg_performance["Rank"] = method_avg_performance["Accuracy"].rank(method="dense", ascending=False)

# Print results
print("Method Rankings Based on Average Accuracy:")
print(method_avg_performance.sort_values("Rank"))

print("Standard Deviation of Performance Metrics:")
print(method_sd_performance)
