In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
data = pd.read_csv('./output/thrombolysis_choice_benefit_he.csv')
data = data[data['infarction'] ==1]
data['all'] = 1
list(data)

['Unnamed: 0',
 'stroke_team',
 'male',
 'year',
 'arrive_by_ambulance',
 'onset_to_arrival_time',
 'onset_during_sleep',
 'arrival_to_scan_time',
 'onset_to_scan',
 'infarction',
 'stroke_severity',
 'precise_onset_known',
 'prior_disability',
 'any_afib_diagnosis',
 'afib_anticoagulant',
 'age',
 'onset_to_thrombolysis',
 'thrombolysis',
 'thrombectomy',
 'scan_to_thrombolysis_time',
 'discharge_disability',
 'simulated_onset_to_thrombolysis',
 'thrombolysis_choice_probability',
 'thrombolysis_choice',
 'hospital_SHAP',
 'benchmark_decision',
 'untreated_weighted_mrs',
 'untreated_0_to_4',
 'untreated_mrs_0',
 'untreated_mrs_1',
 'untreated_mrs_2',
 'untreated_mrs_3',
 'untreated_mrs_4',
 'untreated_mrs_5',
 'untreated_mrs_6',
 'untreated_utility',
 'treated_weighted_mrs',
 'treated_0_to_4',
 'treated_mrs_0',
 'treated_mrs_1',
 'treated_mrs_2',
 'treated_mrs_3',
 'treated_mrs_4',
 'treated_mrs_5',
 'treated_mrs_6',
 'treated_utility',
 'change_in_weighted_mrs',
 'change_in_mrs_0_to_4

In [3]:
# Cherck afib
mask = data['afib_anticoagulant'] == 0

proportion = 1 - mask.mean()
print (f'Proportion of patients receiving anticoagulants: {proportion:.2f}')

Proportion of patients receiving anticoagulants: 0.13


In [4]:
# Set dataframe dispplay rows to 100
pd.set_option('display.max_rows', 100)

data.head().T

Unnamed: 0,0,1,2,3,4
Unnamed: 0,0,1,2,3,5
stroke_team,Royal Berkshire Hospital,Queen Elizabeth the Queen Mother Hospital,Royal Berkshire Hospital,Royal Berkshire Hospital,Queen Elizabeth Hospital Edgbaston
male,0,1,1,1,0
year,2017,2017,2017,2021,2016
arrive_by_ambulance,1.0,1.0,1.0,1.0,1.0
onset_to_arrival_time,65.0,110.0,190.0,90.0,111.0
onset_during_sleep,0,0,0,0,0
arrival_to_scan_time,5.0,75.0,15.0,16.0,39.0
onset_to_scan,70.0,185.0,205.0,106.0,150.0
infarction,1.0,1.0,1.0,1.0,1.0


In [5]:
# Set HE columns when dead

mask = data['used_untreated_mrs'] == 6
data.loc[mask, 'untreated_life_expectancy'] = data.loc[mask, 'age']
data.loc[mask, 'untreated_qalys_total'] = 0
data.loc[mask, 'untreated_care_years'] = 0
data.loc[mask, 'untreated_survival_median_years'] = 0
data.loc[mask, 'untreated_total_discounted_cost'] = 0

mask = data['used_treated_mrs'] == 6
data.loc[mask, 'treated_life_expectancy'] = data.loc[mask, 'age']
data.loc[mask, 'treated_qalys_total'] = 0
data.loc[mask, 'treated_care_years'] = 0
data.loc[mask, 'treated_survival_median_years'] = 0
data.loc[mask, 'treated_total_discounted_cost'] = 0


In [6]:
results = pd.DataFrame()

# Get descriptive statistics for untreated patients
untreated_cols = [
    'untreated_mrs_6',
    'untreated_survival_median_years',
    'untreated_life_expectancy',
    'untreated_care_years',
    'untreated_qalys_total',
    'untreated_life_expectancy',
    'untreated_total_discounted_cost',
    'untreated_mrs_0-2',
    'untreated_mrs_5-6']

treated_cols = [
    'treated_mrs_6',
    'treated_survival_median_years',
    'treated_life_expectancy',
    'treated_care_years',
    'treated_qalys_total',
    'treated_life_expectancy',
    'treated_total_discounted_cost',
    'treated_mrs_0-2',
    'treated_mrs_5-6']

scenarios = ['all', 'thrombolysis', 'benchmark_decision', 'improved_outcome']

counts = dict()

for scenario in scenarios:
    mask = data[scenario] == 1
    counts[scenario] = mask.sum()
    selection_untreated = data.loc[mask][untreated_cols].describe().T
    results[f'{scenario}_untreated'] = selection_untreated['mean']
    selection_treated = data.loc[mask][treated_cols].describe().T
    results[f'{scenario}_treated'] = selection_treated['mean'].values
    results[f'{scenario}_diff'] = results[f'{scenario}_treated'] - results[f'{scenario}_untreated']

results = results.T

col_names = [
    'death_rate',
    'survival_median_years',
    'life_expectancy',
    'care_years',
    'qalys_total',
    'life_expectancy',
    'total_discounted_cost',
    'mrs_0-2',
    'mrs_5-6',]

# Create a rename dictionary
rename_dict = dict(zip(results.columns, col_names))
results = results.rename(columns=rename_dict)



results.to_csv('./output/he_summary.csv')
results.round(3)

Unnamed: 0,death_rate,survival_median_years,life_expectancy,care_years,qalys_total,life_expectancy.1,total_discounted_cost,mrs_0-2,mrs_5-6
all_untreated,0.142,7.598,82.054,0.28,5.013,82.054,20291.998,0.512,0.208
all_treated,0.136,7.9,82.342,0.251,5.249,82.342,19643.554,0.54,0.188
all_diff,-0.006,0.303,0.288,-0.028,0.236,0.288,-648.443,0.028,-0.02
thrombolysis_untreated,0.171,7.603,81.985,0.28,5.02,81.985,20370.167,0.471,0.239
thrombolysis_treated,0.142,7.908,82.269,0.254,5.258,82.269,19805.651,0.539,0.193
thrombolysis_diff,-0.029,0.305,0.283,-0.026,0.238,0.283,-564.515,0.068,-0.047
benchmark_decision_untreated,0.174,7.627,82.01,0.28,5.036,82.01,20387.557,0.465,0.241
benchmark_decision_treated,0.144,7.925,82.298,0.254,5.269,82.298,19784.429,0.534,0.194
benchmark_decision_diff,-0.03,0.298,0.289,-0.027,0.234,0.289,-603.128,0.069,-0.048
improved_outcome_untreated,0.219,7.462,82.138,0.274,4.925,82.138,19892.376,0.378,0.313


In [7]:
counts

{'all': 113096,
 'thrombolysis': 39756,
 'benchmark_decision': 48863,
 'improved_outcome': 56105}