To Do:

Model of death

# Descriptive stats for South West hospitals

In [1]:
import pandas as pd


In [2]:
sw_dic = {
    'Royal Cornwall Hospital': 'SW1',
    'Royal Devon and Exeter Hospital': 'SW2',
    'North Devon District Hospital': 'SW3',
    'Torbay Hospital': 'SW4',
    'Musgrove Park Hospital': 'SW5',
    'Yeovil District Hospital': 'SW6'
}

sw = list(sw_dic.keys())

# Read in the data
data_all = pd.read_csv('../data/data.csv')

key_results = pd.DataFrame()

In [3]:
# Limit data to years 2021 to 2021
data_all = data_all[(data_all['year'] >= 2021)
                    & (data_all['year'] <= 2021)]

# Limit data to out of hopsital onset arriving by ambulance only
data_all = data_all[data_all['onset-to-arrival time'] > 0]
data_all = data_all[data_all['arrive by ambulance'] == True]

# Add new fields
data_all['count'] = 1
data_all['prestroke mrs 0-2'] = data_all['prior disability'] <= 2
data_all['mrs 5-6'] = data_all['discharge disability'] >= 5
data_all['mrs 0-2'] = data_all['discharge disability'] <= 2


Restrict fields

In [4]:
required_fields: list = [
    'count', 'stroke team', 'age', 'male', 'infarction', 'stroke severity',
    'onset-to-arrival time', 'onset known', 'precise onset known',
    'onset during sleep', 'arrive by ambulance', 'year',
    'use of AF anticoagulants', 'prior disability', 'prestroke mrs 0-2',
    'arrival-to-scan time', 'thrombolysis',
    'scan-to-thrombolysis time', 'death', 'discharge disability', 'mrs 5-6', 'mrs 0-2'
]

data_all = data_all[required_fields]
mask = (data_all['onset known'] == True) & (
    data_all['onset-to-arrival time'] <= 240)
data_all['arrive in 4  hours'] = mask

# Add change in disability
data_all['increased disability due to stroke'] = data_all['discharge disability'] - data_all['prior disability']



In [5]:
summary_stats_dict = {
    'count': 'sum',
    'stroke team': 'none',
    'age': 'mean',
    'male': 'mean',
    'infarction': 'mean',
    'stroke severity': 'mean',
    'onset-to-arrival time': 'median',
    'onset known': 'mean',
    'arrive in 4  hours': 'mean',
    'precise onset known': 'mean',
    'onset during sleep': 'mean',
    'year': 'none',
    'use of AF anticoagulants': 'mean',
    'prior disability': 'mean',
    'prestroke mrs 0-2': 'mean',
    'arrival-to-scan time':     'median',
    'thrombolysis':     'mean',
    'scan-to-thrombolysis time': 'median',
    'death': 'mean',
    'discharge disability': 'mean',
    'increased disability due to stroke': 'mean',
    'mrs 5-6': 'mean',
    'mrs 0-2': 'mean'
}

In [6]:
summary_stats = dict()
results = dict()
for k, v in summary_stats_dict.items():
    if v == 'mean':
        results[k] = data_all[k].mean()
    elif v == 'median':
        results[k] = data_all[k].median()
    elif v == 'sum':
        results[k] = data_all[k].sum()
summary_stats['all E+W'] = results


In [7]:
for hospital in sw:
    data = data_all[data_all['stroke team'] == hospital]
    results = dict()
    for k, v in summary_stats_dict.items():
        if v == 'mean':
            results[k] = data[k].mean()
        elif v == 'median':
            results[k] = data[k].median()
        elif v == 'sum':
            results[k] = data[k].sum()
    summary_stats[hospital] = results


In [8]:
summary_stats_df = pd.DataFrame(summary_stats)
summary_stats_df.rename(sw_dic, axis=1, inplace = True)

key_results['onset known'] = summary_stats_df.loc['onset known']
key_results['arrive in 4 hours'] = summary_stats_df.loc['arrive in 4  hours']
key_results['thrombolysis all arrivals'] = summary_stats_df.loc['thrombolysis']

summary_stats_df.round(3)

Unnamed: 0,all E+W,SW1,SW2,SW3,SW4,SW5,SW6
count,50722.0,473.0,465.0,322.0,307.0,332.0,224.0
age,75.134,75.936,78.747,76.382,77.581,75.858,78.281
male,0.528,0.516,0.497,0.503,0.528,0.533,0.5
infarction,0.865,0.837,0.886,0.898,0.857,0.852,0.893
stroke severity,7.827,7.636,7.29,8.093,7.971,7.37,6.036
onset-to-arrival time,370.0,541.0,302.0,353.0,300.0,286.5,182.5
onset known,0.708,0.558,0.865,0.64,1.0,0.702,0.862
arrive in 4 hours,0.412,0.345,0.437,0.419,0.446,0.458,0.58
precise onset known,0.348,0.336,0.308,0.528,0.44,0.452,0.536
onset during sleep,0.133,0.095,0.204,0.106,0.156,0.163,0.094


## Limit analysis to arrival in 4 hours

In [9]:
mask = data_all['arrive in 4  hours'] == True
data_4hr = data_all[mask]

Add onset to thrombolysis



In [14]:
data_4hr['onset to thrombolysis'] = (
    data_4hr['onset-to-arrival time'] + 
    data_4hr['arrival-to-scan time'] + 
    data_4hr['scan-to-thrombolysis time'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_4hr['onset to thrombolysis'] = (


In [10]:
# Repeat analysis for patients arriving in 4 hours
summary_stats_4hr = dict()
results = dict()
for k, v in summary_stats_dict.items():
    if v == 'mean':
        results[k] = data_4hr[k].mean()
    elif v == 'median':
        results[k] = data_4hr[k].median()
    elif v == 'sum':
        results[k] = data_all[k].sum()
summary_stats_4hr['all E+W'] = results

for hospital in sw:
    data = data_4hr[data_4hr['stroke team'] == hospital]
    results = dict()
    for k, v in summary_stats_dict.items():
        if v == 'mean':
            results[k] = data[k].mean()
        elif v == 'median':
            results[k] = data[k].median()
        elif v == 'sum':
            results[k] = data[k].sum()
    summary_stats_4hr[hospital] = results

summary_stats_4hr_df = pd.DataFrame(summary_stats_4hr)
summary_stats_4hr_df = summary_stats_4hr_df.round(3)
summary_stats_4hr_df.rename(sw_dic, axis=1, inplace = True)
summary_stats_4hr_df

Unnamed: 0,all E+W,SW1,SW2,SW3,SW4,SW5,SW6
count,50722.0,163.0,203.0,135.0,137.0,152.0,130.0
age,74.815,74.218,79.101,75.167,78.741,74.967,77.692
male,0.531,0.534,0.522,0.496,0.555,0.592,0.531
infarction,0.847,0.816,0.867,0.867,0.818,0.842,0.862
stroke severity,9.079,7.337,7.97,9.111,9.577,7.507,6.923
onset-to-arrival time,113.0,120.0,123.0,121.0,102.0,117.0,112.0
onset known,1.0,1.0,1.0,1.0,1.0,1.0,1.0
arrive in 4 hours,1.0,1.0,1.0,1.0,1.0,1.0,1.0
precise onset known,0.615,0.687,0.547,0.867,0.672,0.757,0.777
onset during sleep,0.045,0.018,0.094,0.007,0.029,0.0,0.085


In [11]:
rows = [
    'age', 'infarction', 'precise onset known', 'onset during sleep',
    'use of AF anticoagulants', 'prior disability', 'prestroke mrs 0-2', 
    'stroke severity',
    'onset-to-arrival time', 'arrival-to-scan time', 'thrombolysis',
    'scan-to-thrombolysis time', 'discharge disability', 'death',
    'increased disability due to stroke', 'mrs 5-6', 'mrs 0-2'
]

for row in rows:
    key_results[row] = summary_stats_4hr_df.loc[row]


In [12]:
key_results = key_results.round(3).T

In [13]:
key_results

Unnamed: 0,all E+W,SW1,SW2,SW3,SW4,SW5,SW6
onset known,0.708,0.558,0.865,0.64,1.0,0.702,0.862
arrive in 4 hours,0.412,0.345,0.437,0.419,0.446,0.458,0.58
thrombolysis all arrivals,0.126,0.085,0.112,0.127,0.088,0.09,0.129
age,74.815,74.218,79.101,75.167,78.741,74.967,77.692
infarction,0.847,0.816,0.867,0.867,0.818,0.842,0.862
precise onset known,0.615,0.687,0.547,0.867,0.672,0.757,0.777
onset during sleep,0.045,0.018,0.094,0.007,0.029,0.0,0.085
use of AF anticoagulants,0.164,0.172,0.158,0.17,0.146,0.164,0.208
prior disability,1.094,1.252,1.399,1.259,1.19,1.033,0.969
prestroke mrs 0-2,0.795,0.791,0.739,0.785,0.766,0.803,0.831
