In [1]:
import sys
sys.path.append("../")
from PREDICT import PREDICT
from PREDICT.Models import *
from PREDICT.Metrics import *
from PREDICT.Triggers import *
from PREDICT.Plots import *
from Comparison.Detect_Functions import *
import numpy as np
import pandas as pd
from datetime import timedelta
import datetime
import statistics
import time

import warnings
warnings.filterwarnings('ignore')

## Comparing Computation Time for Each of the Methods

In this notebook, four methods to repair temporal drift are compared:

1) Regular model testing
2) Statistical process control (using the first 3 months of data)
3) Static threshold
4) Bayesian variable relative change



In [2]:
startDate = pd.to_datetime('01-06-2019', dayfirst=True) # 01-06-2019
endDate = pd.to_datetime('31-12-2021', dayfirst=True)
num_patients = 40 # number of patients per each timestep

#### Fast Change - COVID Data Simulation

In [None]:
comp_time = {"Regular": [], "Static": [], "SPC": [], "Bayesian": []}
recalthreshold = 0.86 # Paper has AUROC of 0.91, with lower CI at 0.86

custom_impacts = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 2.0, 2.5, 3.0]
switchDateStrings = ['01-04-2020']

hr_age = 0.5
hr_ldh = 9.8
hr_comorbidity = 3.9

log_age = np.log(hr_age)
log_ldh = np.log(hr_ldh)
log_comorbidity = np.log(hr_comorbidity)

for switchDateidx, switchDateString in enumerate(switchDateStrings):
    for custom_impact in custom_impacts:
        mydict = {
                'date': list(),
                'outcome': list(),
                'prediction': list(),
                'age': list(),
                'sex': list(),
                'comorbidity': list(),
                'ldh_high': list()
            }

        # Define date range and COVID shock periods
        switchDate = pd.to_datetime(switchDateString, dayfirst=True)  # COVID starts spreading
        switchDate2 = pd.to_datetime('01-06-2020', dayfirst=True)  # Peak of the pandemic
        recoveryDate = pd.to_datetime('01-06-2021', dayfirst=True)  # Start of recovery phase
        numdays = (endDate - startDate).days
        switchDays = (switchDate - startDate).days
        switch2Days = (switchDate2 - startDate).days
        recoveryDays = (recoveryDate - startDate).days

        for i in range(numdays):
            curday = startDate + dt.timedelta(days=i)

            age = (np.random.normal(44, 16.3, num_patients) - 44) / 16.3  # Mean age 44 years, std 16.3
            sex = np.random.binomial(1, 0.562, num_patients) # 56.2% are male
            comorbidity = np.random.binomial(1, 0.3, num_patients)  # 30% have comorbidities
            ldh_high = np.random.binomial(1, 0.15, num_patients)  # 15% have LDH >500 U/L
            epsilon = np.random.normal(0, 0.2, num_patients) # Simulate error term (mean=0, std=0.2)

            # Calculate baseline log-odds
            # sex influence 1.2 due to not being provided in the paper
            lp = -1.5 + log_age * age +  log_ldh * ldh_high + log_comorbidity * comorbidity + 1.2 * (sex - 0.562) + epsilon
            curpredictions = 1 / (1 + np.exp(-lp))  # Convert to probability

            # Simulate COVID effects
            if switchDays <= i < switch2Days:
                lp += custom_impact  # Initial impact of COVID
            elif switch2Days <= i < recoveryDays:
                lp += custom_impact + 0.5  # Peak of the pandemic
            elif i >= recoveryDays:
                lp -= 1.0  # Recovery periodâ€”improved health outcomes

            # Generate outcomes
            curoutcomes = np.random.binomial(1, 1 / (1 + np.exp(-lp)))  # Simulate COVID events

            # Append to dictionary
            mydict['date'].extend([curday] * num_patients)
            mydict['outcome'].extend(curoutcomes)
            mydict['prediction'].extend(curpredictions)
            mydict['age'].extend(age)
            mydict['sex'].extend(sex)
            mydict['comorbidity'].extend(comorbidity)
            mydict['ldh_high'].extend(ldh_high)

        df = pd.DataFrame(mydict)

        ############################ Regular testing ############################
        reg_start_time = time.time()
        model = RecalibratePredictions()
        model.trigger = TimeframeTrigger(model=model, updateTimestep=100, dataStart=df['date'].min(), dataEnd=df['date'].max())
        mytest = PREDICT(data=df, model=model, startDate='min', endDate='max', timestep='month')
        mytest.addLogHook(Accuracy(model))
        mytest.addLogHook(AUROC(model))
        mytest.addLogHook(Precision(model))
        mytest.run()
        log = mytest.getLog()
        reg_end_time = time.time()
        comp_time["Regular"].append(reg_end_time - reg_start_time)

        ####################################### Static Threshold Testing #######################################
        static_start_time = time.time()
        model = RecalibratePredictions()
        model.trigger = AUROCThreshold(model=model, update_threshold=recalthreshold)
        mytest = PREDICT(data=df, model=model, startDate='min', endDate='max', timestep='month')
        mytest.addLogHook(Accuracy(model))
        mytest.addLogHook(AUROC(model))
        mytest.addLogHook(Precision(model))
        mytest.run()
        log = mytest.getLog()
        static_end_time = time.time()
        comp_time["Static"].append(static_end_time - static_start_time)

        ####################################### SPC3 Testing #######################################
        spc_start_time = time.time()
        model = RecalibratePredictions()
        model.trigger = SPCTrigger(model=model, input_data=df, numMonths=3, verbose=False)
        mytest = PREDICT(data=df, model=model, startDate='min', endDate='max', timestep='month')
        mytest.addLogHook(Accuracy(model))
        mytest.addLogHook(AUROC(model))
        mytest.addLogHook(Precision(model))
        mytest.run()
        log = mytest.getLog()
        spc_end_time = time.time()
        comp_time["SPC"].append(spc_end_time - spc_start_time)
    

        ########################################### Bayesian Testing #######################################
        bayes_start_time = time.time()
        bay_model = BayesianModel(input_data=df, priors={"Intercept": (-1, 2), "age": (log_age, 2), "sex": (1, 2), "comorbidity": (log_comorbidity, 2), "ldh_high": (log_ldh, 2)}, cores=4, verbose=False, draws=1000, tune=250, chains=4)
        bay_model.trigger = BayesianRefitTrigger(model=bay_model, input_data=df, refitFrequency=1)
        mytest = PREDICT(data=df, model=bay_model, startDate='min', endDate='max', timestep='month')
        mytest.addLogHook(Accuracy(model))
        mytest.addLogHook(AUROC(model))
        mytest.addLogHook(Precision(model))
        mytest.run()
        log = mytest.getLog()
        bayes_end_time = time.time()
        comp_time["Bayesian"].append(bayes_end_time - bayes_start_time)

        
print("Average computation times running each method between 01-06-2019 and 31-12-2021:")
for key, times in comp_time.items():
    avg_time = statistics.mean(times)
    std_time = statistics.stdev(times)
    print(f"{key}: {avg_time:.4f} +- {std_time:.4f} seconds")
            
# save dictionary as a csv file
comp_time_df = pd.DataFrame(comp_time)
comp_time_df.to_csv("computation_times.csv", index=False)