In [1]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append("../")
from PREDICT import PREDICT
from PREDICT.Models import *
from PREDICT.Metrics import *
from PREDICT.Triggers import *
from PREDICT.Plots import *
from Comparison.Detect_Functions import *
import numpy as np
import pandas as pd
from datetime import timedelta
import datetime
import statistics
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')

%env PYTENSOR_FLAGS=exception_verbosity=high#,optimizer=fast_compile

env: PYTENSOR_FLAGS=exception_verbosity=high#,optimizer=fast_compile


In [2]:
resultsloc = "./Results/simulation/fast_change"
os.makedirs(resultsloc, exist_ok=True)
if not os.path.exists(os.path.join(resultsloc, 'performance_metrics.csv')):
    header = pd.DataFrame(columns=['Time', 'Accuracy', 'AUROC', 'Precision', 'CalibrationSlope', 'CITL',
    'OE', 'AUPRC', 'F1Score', 'impact_or_prev', 'Method', 'Data_Type'])
    header.to_csv(os.path.join(resultsloc, 'performance_metrics.csv'), index=False)

In [None]:
startDate = pd.to_datetime('01-06-2019', dayfirst=True) # 01-06-2019
endDate = pd.to_datetime('31-12-2021', dayfirst=True) # 31-12-2021
num_patients = 100 # number of patients per each timestep

In [None]:
#recalthreshold = 0.86 # Paper has AUROC of 0.91, with lower CI at 0.86

#custom_impacts = [0.1, 0.2, 0.3, 0.4, 0.5,0.6, 0.7, 0.8, 0.9,1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 2.0, 2.5, 3.0] 
custom_impacts = [0.1]#, 0.4, 0.5,0.6, 0.7, 0.8, 0.9,1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 2.0, 2.5, 3.0] 
switchDateStrings = ['01-04-2020'] # Keep this as just one switchDate as other methods only look at one startDate/deployment date
undetected = dict({"Static Threshold": 0, "Regular Testing": 0, "SPC3": 0, "SPC5":0, "SPC7":0, "Bayesian": 0})

hr_age = 0.5
hr_ldh = 9.8
hr_comorbidity = 3.9

log_age = np.log(hr_age)
log_ldh = np.log(hr_ldh)
log_comorbidity = np.log(hr_comorbidity)

bayes_dict = {"BayesianCoefficients":{}}

for switchDateidx, switchDateString in enumerate(switchDateStrings):
    for custom_impact in custom_impacts:
        regular_ttd = []
        static_ttd = []
        spc_ttd3 = []
        spc_ttd5 = []
        spc_ttd7 = []
        bayesian_ttd = []
        mydict = {
                'date': list(),
                'outcome': list(),
                'prediction': list(),
                'age': list(),
                'sex': list(),
                'comorbidity': list(),
                'ldh_high': list()
            }

        # Define date range and COVID shock periods
        switchDate = pd.to_datetime(switchDateString, dayfirst=True)  # COVID starts spreading
        switchDate2 = pd.to_datetime('01-06-2020', dayfirst=True)  # Peak of the pandemic
        recoveryDate = pd.to_datetime('01-06-2021', dayfirst=True)  # Start of recovery phase
        numdays = (endDate - startDate).days
        switchDays = (switchDate - startDate).days
        switch2Days = (switchDate2 - startDate).days
        recoveryDays = (recoveryDate - startDate).days

        for i in range(numdays):
            curday = startDate + dt.timedelta(days=i)

            age = (np.random.normal(44, 16.3, num_patients) - 44) / 16.3  # Mean age 44 years, std 16.3
            sex = np.random.binomial(1, 0.562, num_patients) # 56.2% are male
            comorbidity = np.random.binomial(1, 0.3, num_patients)  # 30% have comorbidities
            ldh_high = np.random.binomial(1, 0.15, num_patients)  # 15% have LDH >500 U/L
            epsilon = np.random.normal(0, 0.2, num_patients) # Simulate error term (mean=0, std=0.2)

            # Calculate baseline log-odds
            # sex influence 1.2 due to not being provided in the paper
            lp = -1.5 + log_age * age +  log_ldh * ldh_high + log_comorbidity * comorbidity + 1.2 * sex  + epsilon
            curpredictions = 1 / (1 + np.exp(-lp))  # Convert to probability

            # Simulate COVID effects
            if switchDays <= i < switch2Days:
                lp += custom_impact  # Initial impact of COVID
            elif switch2Days <= i < recoveryDays:
                lp += custom_impact + 0.5  # Peak of the pandemic
            elif i >= recoveryDays:
                lp -= 1.0  # Recovery periodâ€”improved health outcomes

            # Generate outcomes
            curoutcomes = np.random.binomial(1, 1 / (1 + np.exp(-lp)))  # Simulate COVID events

            # Append to dictionary
            mydict['date'].extend([curday] * num_patients)
            mydict['outcome'].extend(curoutcomes)
            mydict['prediction'].extend(curpredictions)
            mydict['age'].extend(age)
            mydict['sex'].extend(sex)
            mydict['comorbidity'].extend(comorbidity)
            mydict['ldh_high'].extend(ldh_high)

        df = pd.DataFrame(mydict)

        ########################################### Baseline Testing #######################################
        model_name='COVID_datasim'
        model = EvaluatePredictions()
        mytest = PREDICT(data=df, model=model, startDate='min', endDate='max', timestep='month')
        mytest.addLogHook(Accuracy(model))
        mytest.addLogHook(AUROC(model))
        mytest.addLogHook(Precision(model))
        mytest.addLogHook(CalibrationSlope(model))
        mytest.addLogHook(CITL(model))
        mytest.addLogHook(OE(model))
        mytest.addLogHook(AUPRC(model))
        mytest.addLogHook(F1Score(model))
        mytest.run()
        log = mytest.getLog()

        baseline_metrics = pd.DataFrame({'Time': list(log["Accuracy"].keys()), 'Accuracy': list(log["Accuracy"].values()), 'AUROC': list(log["AUROC"].values()), 'Precision': list(log["Precision"].values()), 'CalibrationSlope': list(log["CalibrationSlope"].values()), 'CITL': list(log["CITL"].values()), 'OE': list(log["O/E"].values()), 'AUPRC': list(log["AUPRC"].values()), 'F1Score': list(log["F1score"].values()), 'impact_or_prev': list([str(custom_impact)] * len(log["Accuracy"])), 'Method':list(['Baseline'] * len(log["Accuracy"]))})
        # Use baseline measure of OE score in time before switchDate to get CI
        recalthreshold_lower = float(baseline_metrics[baseline_metrics['Time'] < switchDate]['OE'].quantile(0.025))
        recalthreshold_upper = float(baseline_metrics[baseline_metrics['Time'] < switchDate]['OE'].quantile(0.975))
        print(f"Using OE Threshold of {recalthreshold_lower} - {recalthreshold_upper} for impact {custom_impact}, mean was {baseline_metrics[baseline_metrics['Time'] < switchDate]['OE'].mean()}")
        
        ########################################### Save Metrics #######################################
        baseline_metrics["Data_Type"] = "COVID Simulation"
        baseline_metrics.to_csv(os.path.join(resultsloc, 'performance_metrics.csv'), mode='a', header=False, index=False)
        
        ########################################### Test models ##########################################
        
        covid_metrics_df = get_metrics_recal_methods(df, custom_impact, recalthreshold_lower, recalthreshold_upper, model_name='COVID_datasim')
        undetected, regular_ttd, static_ttd, spc_ttd3, spc_ttd5, spc_ttd7 = run_recalibration_tests(df, switchDate, undetected, regular_ttd, static_ttd, spc_ttd3, spc_ttd5, spc_ttd7, recalthreshold_lower, recalthreshold_upper)
        ########################################### Bayesian Testing #######################################
        bay_model = BayesianModel(input_data=df, priors={"Intercept": (-1.5, 0.1), "age": (log_age, 0.1), "sex": (1.2, 0.1), "comorbidity": (log_comorbidity, 0.1), "ldh_high": (log_ldh, 0.1)}, cores=2, verbose=False, draws=1000, tune=250, chains=4)
        bay_model.trigger = TimeframeTrigger(model=bay_model, updateTimestep='month', dataStart=startDate, dataEnd=endDate)
        mytest = PREDICT(data=df, model=bay_model, startDate='min', endDate='max', timestep='month')
        mytest.addLogHook(Accuracy(bay_model))
        mytest.addLogHook(AUROC(bay_model))
        mytest.addLogHook(Precision(bay_model))
        mytest.addLogHook(CalibrationSlope(bay_model))
        mytest.addLogHook(CITL(bay_model))
        mytest.addLogHook(OE(bay_model))
        mytest.addLogHook(AUPRC(bay_model))
        mytest.addLogHook(F1Score(model))
        mytest.addLogHook(TrackBayesianCoefs(bay_model))
        mytest.run()
        log = mytest.getLog()

        if "BayesianCoefficients" in log:
            bayes_dict["BayesianCoefficients"].update(log["BayesianCoefficients"])
            print(log["BayesianCoefficients"])
        
        ttd = find_bayes_coef_change(bayes_dict["BayesianCoefficients"], detectDate=switchDate, undetected=undetected, threshold=0.1)
        print(ttd)
        bayesian_ttd.append(ttd)

        bayes_metrics = pd.DataFrame({'Time': list(log["Accuracy"].keys()), 'Accuracy': list(log["Accuracy"].values()), 'AUROC': list(log["AUROC"].values()), 'Precision': list(log["Precision"].values()), 'CalibrationSlope': list(log["CalibrationSlope"].values()), 'CITL': list(log["CITL"].values()), 'OE': list(log["O/E"].values()), 'AUPRC': list(log["AUPRC"].values()), 'F1Score': list(log["F1score"].values()), 'impact_or_prev': list([str(custom_impact)] * len(log["Accuracy"])), 'Method':list(['Bayesian'] * len(log["Accuracy"]))})
        
        ########################################### Save Metrics #######################################

        # concatenate all the dataframes into one
        covid_metrics_df = pd.concat([covid_metrics_df, bayes_metrics], ignore_index=True)
        covid_metrics_df["Data_Type"] = "COVID Simulation"

        covid_metrics_df.to_csv(os.path.join(resultsloc, 'performance_metrics.csv'), mode='a', header=False, index=False)

        update_ttd_table(regular_ttd, static_ttd, spc_ttd3, spc_ttd5, spc_ttd7, bayesian_ttd, custom_impact, os.path.join(resultsloc, 'covid_ttd_tbl.csv'))

        # these two just do the final impact value:
        BayesianCoefsPlot(bayes_dict, model_name = f"fast_change_impact_{custom_impact}", fileloc=resultsloc) 
        plot_prev_over_time(df, switchDateStrings, regular_ttd, static_ttd, spc_ttd3, spc_ttd5, spc_ttd7, bayesian_ttd, f"fast_change_impact_{custom_impact}", fileloc=resultsloc)
        

Using F1 Threshold of 0.6769536855838226 for impact 0.1, mean was 0.7009820365386267


Modeling the probability that outcome==1


No model formula was provided, using standard linear model formula.
Model formula is set to:  outcome~age+sex+comorbidity+ldh_high


Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 2 jobs)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 37 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 2 jobs)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 44 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 2 jobs)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 37 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 2 jobs)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 36 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 2 jobs)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 47 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 2 jobs)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 50 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 2 jobs)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 57 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 2 jobs)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 49 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 2 jobs)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 55 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 2 jobs)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 50 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 2 jobs)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 48 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 2 jobs)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 55 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 2 jobs)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 63 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 2 jobs)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 65 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 2 jobs)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 68 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 2 jobs)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 120 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 2 jobs)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 77 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 2 jobs)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 68 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 2 jobs)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 63 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 2 jobs)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 56 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 2 jobs)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 69 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 2 jobs)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 86 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 2 jobs)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 69 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 2 jobs)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 49 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 2 jobs)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 48 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 2 jobs)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 37 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 2 jobs)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 46 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 2 jobs)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 48 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 2 jobs)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 30 seconds.
Modeling the probability that outcome==1
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 2 jobs)
NUTS: [Intercept, age, sex, comorbidity, ldh_high]


Output()

Sampling 4 chains for 250 tune and 1_000 draw iterations (1_000 + 4_000 draws total) took 36 seconds.


{Timestamp('2019-07-01 00:00:00'): {'Intercept': (-1.5, 0.25), 'age': (-0.6931471805599453, 0.25), 'sex': (1.2, 0.25), 'comorbidity': (1.3609765531356006, 0.25), 'ldh_high': (2.2823823856765264, 0.25)}, Timestamp('2019-08-01 00:00:00'): {'Intercept': (-1.4751610555020744, 0.25), 'age': (-0.7277983911779777, 0.25), 'sex': (1.0291859487333723, 0.25), 'comorbidity': (1.1218518884422302, 0.25), 'ldh_high': (2.227777713301418, 0.25)}, Timestamp('2019-09-01 00:00:00'): {'Intercept': (-1.6535905175792671, 0.25), 'age': (-0.7024492756397098, 0.25), 'sex': (1.1969132572938237, 0.25), 'comorbidity': (1.3302118786711203, 0.25), 'ldh_high': (2.1180339227601515, 0.25)}, Timestamp('2019-10-01 00:00:00'): {'Intercept': (-1.6339701308600758, 0.25), 'age': (-0.7031237956812205, 0.25), 'sex': (1.2661242655485285, 0.25), 'comorbidity': (1.278940544286359, 0.25), 'ldh_high': (2.234015992583164, 0.25)}, Timestamp('2019-11-01 00:00:00'): {'Intercept': (-1.7293275430323716, 0.25), 'age': (-0.763341691365066,

In [5]:
# Tweak plot outputs
BayesianCoefsPlot(bayes_dict, model_name = f"fast_change_impact_{custom_impact}", fileloc=resultsloc) 
plot_prev_over_time(df, switchDateStrings, regular_ttd, static_ttd, spc_ttd3, spc_ttd5, spc_ttd7, bayesian_ttd, f"fast_change_impact_{custom_impact}", fileloc=resultsloc)