In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import matplotlib.ticker as mtick
from matplotlib.dates import DateFormatter
from datetime import timedelta
from datetime import datetime
from tqdm import tqdm
import gc

##### Load results table

In [None]:
tr_data = pd.read_csv('')
reg_hc_data = pd.read_csv('')
strat_hc_data = pd.read_csv('')
print(tr_data.timepoint.unique())

In [None]:
plt.rcParams.update({'font.size':12, 'font.weight':'normal', 'font.family':'serif'})

##### Model change in performance across outcomes

In [None]:
tr_data['timepoint'] = np.where(tr_data.timepoint == 'Hospital admission', 'Admission', tr_data.timepoint)
tr_data['timepoint'] = np.where(tr_data.timepoint == 'ED attendance', 'ED arrival', tr_data.timepoint)
tr_data['target'] = np.where(tr_data['target'] == 'Admission to MoE', 'Geriatric Medicine services', tr_data.target)
#tr_data['timepoint'] = np.where(tr_data.timepoint == '24hrs post-admission', '24h PA', tr_data.timepoint)
#tr_data['timepoint'] = np.where(tr_data.timepoint == '48hrs post-admission', '48h PA', tr_data.timepoint)
#tr_data['timepoint'] = np.where(tr_data.timepoint == '72hrs post-admission', '72h PA', tr_data.timepoint)

##### ROC-AUC

In [None]:
def plot_per_trajectories(tr_data, colors={'In-hospital death': '#d7191c',
                                        'Extended stay (>=14 days)': '#fdae61',
                                        'ICU/HDU admission': '#756bb1',
                                        'Home discharge': '#2c7bb6',
                                         'Geriatric Medicine services': '#ed849e',
                                          'Received rehabilitation': '#810f7c'}):
    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(nrows=2, ncols=2, figsize=(11, 9))
    for target in tr_data.target.unique():
        tg_data = tr_data[tr_data.target == target]
        ax1.plot(tg_data.timepoint, tg_data['ROC-AUC'], marker='o', label=target, color=colors[target])
        ax1.fill_between(tg_data.timepoint, tg_data['ROC-lower'], tg_data['ROC-upper'], alpha=0.2, color=colors[target])
        ax2.plot(tg_data.timepoint, tg_data['PR-AUC'], marker='o', label=target, color=colors[target])
        ax2.fill_between(tg_data.timepoint, tg_data['PR-lower'], tg_data['PR-upper'], alpha=0.2, color=colors[target])
        ax3.plot(tg_data.timepoint, tg_data['outcome_prev'], color=colors[target], marker='^', linestyle='--', alpha=0.8)
        ax4.plot(tg_data.timepoint, tg_data['10th_decile_response'], marker='o', label=target, color=colors[target])
        
    #plt.xlabel('Prediction stage')
    ax1.set_ylabel('ROC-AUC')
    ax2.set_ylabel('PR-AUC')
    ax3.set_ylabel('Outcome prevalence')
    ax4.set_ylabel('Response rate at top risk decile')
    plt.suptitle('Performance trajectory per secondary outcome with 95% CI.')
    ax1.legend(loc='lower right', framealpha=0.5)
    #ax2.legend(loc='upper left', framealpha=0.5)
    ax1.grid(True, linestyle='--', alpha=0.7)
    ax2.grid(True, linestyle='--', alpha=0.7)
    ax3.grid(True, linestyle='--', alpha=0.7)
    ax4.grid(True, linestyle='--', alpha=0.7)
    ax3.yaxis.set_major_formatter('{x:1.0f}%')
    ax4.yaxis.set_major_formatter('{x:1.0f}%')
    #plt.xticks(rotation=45)
    ax1.set_ylim(0.5, 1.0)
    ax2.set_ylim(0.0, 1.0)
    ax4.set_ylim(0.0,100)
    plt.tight_layout()
    plt.show()

In [None]:
plot_per_trajectories(tr_data)

In [None]:
def plot_risk_trajectories(tr_data, colors={'In-hospital death': '#d7191c',
                                        'Extended stay (>=14 days)': '#fdae61',
                                        'ICU/HDU admission': '#756bb1',
                                        'Home discharge': '#2c7bb6',
                                           'Admission to MoE': '#ed849e',
                                           'Received rehabilitation': '#810f7c'}):
    fig = plt.figure(figsize=(6,5))
    for target in tr_data.target.unique():
        tg_data = tr_data[tr_data.target == target]
        plt.plot(tg_data.timepoint, tg_data['10th_decile_response'], marker='o', label=target, color=colors[target])
        
    #plt.xlabel('Prediction stage')
    plt.ylabel('Response rate at highest risk decile')
    plt.title('Patient response rate trajectory per secondary outcome')
    #plt.legend(loc='lower left', prop={'size': 11})
    plt.grid(True, linestyle='--', alpha=0.7)
    ax = plt.gca()
    ax.yaxis.set_major_formatter('{x:1.0f}%')
    plt.ylim(0,100)
    plt.tight_layout()
    plt.show()

In [None]:
plot_risk_trajectories(tr_data)

#### Care intensity trajectories

In [None]:
reg_hc_data['timepoint'] = np.where(reg_hc_data.timepoint == 'Hospital admission', 'Admission', reg_hc_data.timepoint)
reg_hc_data['timepoint'] = np.where(reg_hc_data.timepoint == 'ED attendance', 'ED arrival', reg_hc_data.timepoint)

In [None]:
reg_hc_data

In [None]:
def plot_hc_trajectories(tr_data, colors={'Total health contacts': '#d7191c'}):
    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(nrows=2, ncols=2, figsize=(7,7))
    for target in tr_data.target.unique():
        tg_data = tr_data[tr_data.target == target]
        ax1.plot(tg_data.timepoint, tg_data['MAE'], marker='o', label=target, color=colors[target])
        ax1.fill_between(tg_data.timepoint, tg_data['MAE-upper'], tg_data['MAE-lower'], alpha=0.2, color=colors[target])
        ax2.plot(tg_data.timepoint, tg_data['MAPE'], marker='o', label=target, color=colors[target])
        ax2.fill_between(tg_data.timepoint, tg_data['MAPE-upper'], tg_data['MAPE-lower'], alpha=0.2, color=colors[target])
        ax3.plot(tg_data.timepoint, tg_data['Balanced Accuracy'], marker='o', label=target, color=colors[target])
        ax3.fill_between(tg_data.timepoint, tg_data['Bacc_upper'], tg_data['Bacc_lower'], alpha=0.2, color=colors[target])
        ax4.plot(tg_data.timepoint, tg_data['Cohen\'s Kappa Score'], marker='o', label=target, color=colors[target])
        ax4.fill_between(tg_data.timepoint, tg_data['CKS_upper'], tg_data['CKS_lower'], alpha=0.2, color=colors[target])
        #ax2.plot(tg_data.timepoint, tg_data['outcome_prev']/100, color=colors[target], marker='^', linestyle='--', alpha=0.6)
        
    #plt.xlabel('Prediction stage')
    ax1.set_ylabel('MAE')
    ax2.set_ylabel('cMAPE (%)')
    ax3.set_ylabel('Balanced Accuracy')
    ax4.set_ylabel('Cohen\'s Kappa Score')
    plt.suptitle('Performance trajectory in health contacts regression with 95% CI.')
    #ax1.legend(loc='upper left', framealpha=0.5)
    #ax2.legend(loc='upper left', framealpha=0.5)
    ax1.grid(True, linestyle='--', alpha=0.7)
    ax2.grid(True, linestyle='--', alpha=0.7)
    ax3.grid(True, linestyle='--', alpha=0.7)
    ax4.grid(True, linestyle='--', alpha=0.7)
    ax1.set_ylim([0.75, 1.00])
    ax2.set_ylim([30, 50])
    ax3.set_ylim([0.25, 0.35])
    ax4.set_ylim([0.35, 0.5])
    ax2.yaxis.set_major_formatter("{x:1.0f}%")
    for a in [ax1, ax2, ax3, ax4]:
        plt.sca(a)
        plt.xticks(rotation=30)
    #ax1.set_xticks(rotation=30)
    #ax1.set_ylim(0.65, 1.0)
    #ax2.set_ylim(0.0, 0.8)
    plt.tight_layout()
    plt.show()

In [None]:
plot_hc_trajectories(reg_hc_data)

In [None]:
strat_hc_data['timepoint'] = np.where(strat_hc_data.timepoint == 'Hospital admission', 'Admission', strat_hc_data.timepoint)
strat_hc_data['timepoint'] = np.where(strat_hc_data.timepoint == 'ED attendance', 'ED arrival', strat_hc_data.timepoint)

In [None]:
strat_hc_data.head()

In [None]:
def plot_age_hc_trajectories(tr_data, colors={'50-59': '#fed98e', 
                                              '60-69': '#fdd49e', 
                                              '70-79': '#fdbb84', 
                                              '80-89': '#fc8d59', 
                                              '90+': '#e34a33'}):
    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(nrows=2, ncols=2, figsize=(7,7))
    tr_data = tr_data[tr_data['Group'].isin(['50-59', '60-69', '70-79', '80-89', '90+'])]
    for target in tr_data['Group'].unique():
        tg_data = tr_data[tr_data.Group == target]
        ax1.plot(tg_data.timepoint, tg_data['MAE'], marker='o', label=target, color=colors[target])
        ax1.fill_between(tg_data.timepoint, tg_data['MAE-upper'], tg_data['MAE-lower'], alpha=0.2, color=colors[target])
        ax2.plot(tg_data.timepoint, tg_data['MAPE'], marker='o', label=target, color=colors[target])
        ax2.fill_between(tg_data.timepoint, tg_data['MAPE-upper'], tg_data['MAPE-lower'], alpha=0.2, color=colors[target])
        ax3.plot(tg_data.timepoint, tg_data['Balanced Accuracy'], marker='o', label=target, color=colors[target])
        ax3.fill_between(tg_data.timepoint, tg_data['Bacc_upper'], tg_data['Bacc_lower'], alpha=0.2, color=colors[target])
        ax4.plot(tg_data.timepoint, tg_data['Cohen\'s Kappa Score'], marker='o', label=target, color=colors[target])
        ax4.fill_between(tg_data.timepoint, tg_data['CKS_upper'], tg_data['CKS_lower'], alpha=0.2, color=colors[target])
        #ax2.plot(tg_data.timepoint, tg_data['outcome_prev']/100, color=colors[target], marker='^', linestyle='--', alpha=0.6)
        
    #plt.xlabel('Prediction stage')
    ax1.set_ylabel('MAE')
    ax2.set_ylabel('cMAPE (%)')
    ax3.set_ylabel('Balanced Accuracy')
    ax4.set_ylabel('Cohen\'s Kappa Score')
    plt.suptitle('Performance trajectory by age in health contacts regression with 95% CI.')
    ax1.legend(loc='upper right', framealpha=0.5)
    #ax2.legend(loc='upper left', framealpha=0.5)
    ax1.grid(True, linestyle='--', alpha=0.7)
    ax2.grid(True, linestyle='--', alpha=0.7)
    ax3.grid(True, linestyle='--', alpha=0.7)
    ax4.grid(True, linestyle='--', alpha=0.7)
    ax1.set_ylim([0.7, 1.20])
    ax2.set_ylim([25, 60])
    ax3.set_ylim([0.20, 0.35])
    ax4.set_ylim([0.1, 0.5])
    ax2.yaxis.set_major_formatter("{x:1.0f}%")
    for a in [ax1, ax2, ax3, ax4]:
        plt.sca(a)
        plt.xticks(rotation=30)
    #ax1.set_xticks(rotation=30)
    #ax1.set_ylim(0.65, 1.0)
    #ax2.set_ylim(0.0, 0.8)
    plt.tight_layout()
    plt.show()

In [None]:
plot_age_hc_trajectories(strat_hc_data)

In [None]:
def plot_simd_hc_trajectories(tr_data, colors={'1 - most deprived': '#74a9cf', 
                                              '2-4': '#2b8cbe', 
                                              '5 - least deprived': '#045a8d'}):
    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(nrows=2, ncols=2, figsize=(7,7))
    tr_data = tr_data[tr_data['Group'].isin(['1 - most deprived', '2-4', '5 - least deprived'])]
    for target in tr_data['Group'].unique():
        tg_data = tr_data[tr_data.Group == target]
        ax1.plot(tg_data.timepoint, tg_data['MAE'], marker='o', label=target, color=colors[target])
        ax1.fill_between(tg_data.timepoint, tg_data['MAE-upper'], tg_data['MAE-lower'], alpha=0.2, color=colors[target])
        ax2.plot(tg_data.timepoint, tg_data['MAPE'], marker='o', label=target, color=colors[target])
        ax2.fill_between(tg_data.timepoint, tg_data['MAPE-upper'], tg_data['MAPE-lower'], alpha=0.2, color=colors[target])
        ax3.plot(tg_data.timepoint, tg_data['Balanced Accuracy'], marker='o', label=target, color=colors[target])
        ax3.fill_between(tg_data.timepoint, tg_data['Bacc_upper'], tg_data['Bacc_lower'], alpha=0.2, color=colors[target])
        ax4.plot(tg_data.timepoint, tg_data['Cohen\'s Kappa Score'], marker='o', label=target, color=colors[target])
        ax4.fill_between(tg_data.timepoint, tg_data['CKS_upper'], tg_data['CKS_lower'], alpha=0.2, color=colors[target])
        #ax2.plot(tg_data.timepoint, tg_data['outcome_prev']/100, color=colors[target], marker='^', linestyle='--', alpha=0.6)
        
    #plt.xlabel('Prediction stage')
    ax1.set_ylabel('MAE')
    ax2.set_ylabel('cMAPE (%)')
    ax3.set_ylabel('Balanced Accuracy')
    ax4.set_ylabel('Cohen\'s Kappa Score')
    plt.suptitle('Performance trajectory by SIMD in health contacts regression with 95% CI.')
    ax4.legend(loc='lower right', framealpha=0.5)
    #ax2.legend(loc='upper left', framealpha=0.5)
    ax1.grid(True, linestyle='--', alpha=0.7)
    ax2.grid(True, linestyle='--', alpha=0.7)
    ax3.grid(True, linestyle='--', alpha=0.7)
    ax4.grid(True, linestyle='--', alpha=0.7)
    ax1.set_ylim([0.75, 1.05])
    ax2.set_ylim([25, 55])
    ax3.set_ylim([0.25, 0.35])
    ax4.set_ylim([0.25, 0.55])
    ax2.yaxis.set_major_formatter("{x:1.0f}%")
    for a in [ax1, ax2, ax3, ax4]:
        plt.sca(a)
        plt.xticks(rotation=30)
    #ax1.set_xticks(rotation=30)
    #ax1.set_ylim(0.65, 1.0)
    #ax2.set_ylim(0.0, 0.8)
    plt.tight_layout()
    plt.show()

In [None]:
plot_simd_hc_trajectories(strat_hc_data)