To Do:

Model of death

# Descriptive stats for South West hospitals

In [1]:
import pandas as pd


In [2]:
sw = pd.read_csv('stroke_teams.csv', index_col=False)

In [3]:
# List of stroke teams
sw = list(sw.squeeze().values)

In [4]:
sw_dic = {
    'Royal Cornwall Hospital': 'SW1',
    'Royal Devon and Exeter Hospital': 'SW2',
    'North Devon District Hospital': 'SW3',
    'Torbay Hospital': 'SW4',
    'Musgrove Park Hospital': 'SW5',
    'Yeovil District Hospital': 'SW6'
}

# sw = list(sw_dic.keys())

# Read in the data
data_all = pd.read_csv('~/ssnap_data/data.csv')

key_results = pd.DataFrame()

In [5]:
years_covered = sorted(list(set(data_all['year'])))
all_years_str = f'{years_covered[0]} to {years_covered[-1]}'

In [6]:
# # Limit data to years 2021 to 2021
# data_all = data_all[(data_all['year'] >= 2021)
#                     & (data_all['year'] <= 2021)]

# Limit data to out of hopsital onset arriving by ambulance only
data_all = data_all[data_all['onset-to-arrival time'] > 0]
data_all = data_all[data_all['arrive by ambulance'] == True]

# Add new fields
data_all['count'] = 1
data_all['prestroke mrs 0-2'] = data_all['prior disability'] <= 2
data_all['mrs 5-6'] = data_all['discharge disability'] >= 5
data_all['mrs 0-2'] = data_all['discharge disability'] <= 2


Restrict fields

In [7]:
required_fields: list = [
    'count', 'stroke team', 'age', 'male', 'infarction', 'stroke severity',
    'onset-to-arrival time', 'onset known', 'precise onset known',
    'onset during sleep', 'arrive by ambulance', 'year',
    'use of AF anticoagulants', 'prior disability', 'prestroke mrs 0-2',
    'arrival-to-scan time', 'thrombolysis',
    'scan-to-thrombolysis time', 'death', 'discharge disability', 'mrs 5-6', 'mrs 0-2'
]

data_all = data_all[required_fields]
mask = (data_all['onset known'] == True) & (
    data_all['onset-to-arrival time'] <= 240)
data_all['arrive in 4  hours'] = mask

# Add change in disability
data_all['increased disability due to stroke'] = data_all['discharge disability'] - data_all['prior disability']

In [8]:
summary_stats_dict = {
    'count': 'sum',
    'stroke team': 'none',
    'age': 'mean',
    'male': 'mean',
    'infarction': 'mean',
    'stroke severity': 'mean',
    'onset-to-arrival time': 'median',
    'onset known': 'mean',
    'arrive in 4  hours': 'mean',
    'precise onset known': 'mean',
    'onset during sleep': 'mean',
    'year': 'none',
    'use of AF anticoagulants': 'mean',
    'prior disability': 'mean',
    'prestroke mrs 0-2': 'mean',
    'arrival-to-scan time':     'median',
    'thrombolysis':     'mean',
    'scan-to-thrombolysis time': 'median',
    'death': 'mean',
    'discharge disability': 'mean',
    'increased disability due to stroke': 'mean',
    'mrs 5-6': 'mean',
    'mrs 0-2': 'mean'
}

In [9]:
summary_stats = dict()
for year in [all_years_str] + years_covered:
    if year == all_years_str:
        data = data_all
    else:
        data = data_all[(
            (data_all['year'] == year)
            )]
    results = dict()
    for k, v in summary_stats_dict.items():
        if v == 'mean':
            results[k] = data[k].mean()
        elif v == 'median':
            results[k] = data[k].median()
        elif v == 'sum':
            results[k] = data[k].sum()
    results['stroke_team'] = 'all E+W'
    results['year'] = year
    summary_stats[f'all E+W ({year})'] = results


In [10]:
for hospital in sw:
    for year in [all_years_str] + years_covered:
        if year == all_years_str:
            data = data_all[data_all['stroke team'] == hospital]
        else:
            data = data_all[(
            (data_all['stroke team'] == hospital) &
            (data_all['year'] == year)
            )]
        results = dict()
        for k, v in summary_stats_dict.items():
            if v == 'mean':
                results[k] = data[k].mean()
            elif v == 'median':
                results[k] = data[k].median()
            elif v == 'sum':
                results[k] = data[k].sum()
        results['stroke_team'] = hospital
        results['year'] = year
        summary_stats[f'{hospital} ({year})'] = results


In [11]:
summary_stats_df = pd.DataFrame(summary_stats)
# summary_stats_df.rename(sw_dic, axis=1, inplace = True)

# Only keep hospitals with more than 100 admissions:
mask_count = summary_stats_df.loc['count'] > 100
summary_stats_df = summary_stats_df.T[mask_count].T

In [12]:

key_results['onset known'] = summary_stats_df.loc['onset known']
key_results['arrive in 4 hours'] = summary_stats_df.loc['arrive in 4  hours']
key_results['thrombolysis all arrivals'] = summary_stats_df.loc['thrombolysis']


summary_stats_df.round(3)

Unnamed: 0,all E+W (2016 to 2021),all E+W (2016),all E+W (2017),all E+W (2018),all E+W (2019),all E+W (2020),all E+W (2021),Northumbria Specialist Emergency Care Hospital HASU (2016 to 2021),Northumbria Specialist Emergency Care Hospital HASU (2016),Northumbria Specialist Emergency Care Hospital HASU (2017),...,Queen's Medical Centre - Nottingham (2016 to 2021),Queen's Medical Centre - Nottingham (2020),Queen's Medical Centre - Nottingham (2021),Invicta Ward Kent and Canterbury Hospital (2016 to 2021),Invicta Ward Kent and Canterbury Hospital (2020),Invicta Ward Kent and Canterbury Hospital (2021),University Hospitals Dorset Stroke Service (2016 to 2021),University Hospitals Dorset Stroke Service (2021),Grange University Hospital (2016 to 2021),Grange University Hospital (2021)
count,283946,45925,47145,46137,46793,47224,50722,2056,389,309,...,1222,254,956,991,362,629,520,421,234,194
age,75.570672,75.797768,75.855711,75.719217,75.802951,75.159347,75.13367,74.229086,74.993573,74.635922,...,73.854337,74.488189,73.859833,75.355701,76.174033,74.884738,75.711538,75.564133,73.739316,73.737113
male,0.514584,0.505041,0.50508,0.514576,0.513773,0.520011,0.527759,0.537451,0.562982,0.579288,...,0.487725,0.480315,0.485356,0.525732,0.513812,0.532591,0.553846,0.539192,0.525641,0.5
infarction,0.865179,0.864888,0.867176,0.865531,0.861817,0.866487,0.865147,0.85749,0.843188,0.883495,...,0.855974,0.893701,0.848326,0.902119,0.944751,0.877583,0.875,0.862233,0.803419,0.804124
stroke severity,8.086298,8.103756,8.259943,8.247979,8.177334,7.926055,7.827235,7.452821,6.933162,7.142395,...,9.013912,9.283465,8.92364,7.918264,7.607735,8.096979,6.740385,6.707838,7.235043,7.247423
onset-to-arrival time,331.0,308.0,318.0,316.0,319.0,350.0,370.0,270.0,273.0,297.0,...,460.0,214.0,561.0,220.0,285.0,193.0,257.0,255.0,482.5,488.0
onset known,0.705057,0.694567,0.694941,0.706786,0.718975,0.70714,0.707602,0.807393,0.799486,0.786408,...,0.574468,0.708661,0.534519,0.775984,0.748619,0.791733,0.8,0.817102,0.688034,0.695876
arrive in 4 hours,0.436868,0.450822,0.447343,0.446063,0.442075,0.425335,0.41207,0.473249,0.473008,0.466019,...,0.373977,0.519685,0.329498,0.522704,0.461326,0.558029,0.471154,0.475059,0.324786,0.345361
precise onset known,0.361167,0.36614,0.367017,0.369378,0.363281,0.354015,0.348468,0.459144,0.529563,0.427184,...,0.017185,0.015748,0.007322,0.102926,0.038674,0.139905,0.276923,0.251781,0.423077,0.417526
onset during sleep,0.136959,0.133348,0.139951,0.137222,0.138739,0.139357,0.133335,0.192607,0.18509,0.216828,...,0.00491,0.003937,0.004184,0.082745,0.08011,0.084261,0.138462,0.133017,0.183761,0.180412


In [13]:
summary_stats_df.round(3).to_csv('summary_stats.csv')

## Limit analysis to arrival in 4 hours

In [14]:
mask = data_all['arrive in 4  hours'] == True
data_4hr = data_all[mask]

Add onset to thrombolysis



In [15]:
data_4hr['onset to thrombolysis'] = (
    data_4hr['onset-to-arrival time'] + 
    data_4hr['arrival-to-scan time'] + 
    data_4hr['scan-to-thrombolysis time'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_4hr['onset to thrombolysis'] = (


In [16]:
# Repeat analysis for patients arriving in 4 hours
summary_stats_4hr = dict()
for year in [all_years_str] + years_covered:
    if year == all_years_str:
        data = data_4hr
    else:
        data = data_4hr[(
            (data_4hr['year'] == year)
            )]
    results = dict()
    for k, v in summary_stats_dict.items():
        if v == 'mean':
            results[k] = data[k].mean()
        elif v == 'median':
            results[k] = data[k].median()
        elif v == 'sum':
            results[k] = data[k].sum()
    results['stroke_team'] = 'all E+W'
    results['year'] = year
    summary_stats_4hr[f'all E+W ({year})'] = results

for hospital in sw:
    for year in [all_years_str] + years_covered:
        if year == all_years_str:
            data = data_4hr[data_4hr['stroke team'] == hospital]
        else:
            data = data_4hr[(
                (data_4hr['stroke team'] == hospital) &
                (data_4hr['year'] == year)
                )]
        # data = data_4hr[data_4hr['stroke team'] == hospital]
        results = dict()
        for k, v in summary_stats_dict.items():
            if v == 'mean':
                results[k] = data[k].mean()
            elif v == 'median':
                results[k] = data[k].median()
            elif v == 'sum':
                results[k] = data[k].sum()
        results['stroke_team'] = hospital
        results['year'] = year
        summary_stats_4hr[f'{hospital} ({year})'] = results

summary_stats_4hr_df = pd.DataFrame(summary_stats_4hr)

# Only keep hospitals with more than 100 admissions
# in the full data (not the 4hr data) - same mask as earlier.
summary_stats_4hr_df = summary_stats_4hr_df.T[mask_count].T


summary_stats_4hr_df = summary_stats_4hr_df.round(3)
# summary_stats_4hr_df.rename(sw_dic, axis=1, inplace = True)
summary_stats_4hr_df

Unnamed: 0,all E+W (2016 to 2021),all E+W (2016),all E+W (2017),all E+W (2018),all E+W (2019),all E+W (2020),all E+W (2021),Northumbria Specialist Emergency Care Hospital HASU (2016 to 2021),Northumbria Specialist Emergency Care Hospital HASU (2016),Northumbria Specialist Emergency Care Hospital HASU (2017),...,Queen's Medical Centre - Nottingham (2016 to 2021),Queen's Medical Centre - Nottingham (2020),Queen's Medical Centre - Nottingham (2021),Invicta Ward Kent and Canterbury Hospital (2016 to 2021),Invicta Ward Kent and Canterbury Hospital (2020),Invicta Ward Kent and Canterbury Hospital (2021),University Hospitals Dorset Stroke Service (2016 to 2021),University Hospitals Dorset Stroke Service (2021),Grange University Hospital (2016 to 2021),Grange University Hospital (2021)
count,124047,20704,21090,20580,20686,20086,20901,973,184,144,...,457,132,315,518,167,351,245,200,76,67
age,75.124691,75.416103,75.287577,75.289116,75.269506,74.657722,74.8152,73.702467,74.565217,74.409722,...,73.561269,72.954545,74.214286,75.405405,75.494012,75.363248,75.908163,75.95,72.5,72.873134
male,0.521198,0.512268,0.51138,0.520943,0.524799,0.526685,0.531362,0.527235,0.565217,0.548611,...,0.501094,0.5,0.492063,0.53861,0.57485,0.521368,0.522449,0.515,0.578947,0.58209
infarction,0.848622,0.849787,0.852916,0.850049,0.844823,0.84656,0.847471,0.840699,0.798913,0.847222,...,0.873085,0.909091,0.863492,0.901544,0.934132,0.88604,0.84898,0.835,0.776316,0.761194
stroke severity,9.331439,9.396783,9.440066,9.447328,9.420526,9.202031,9.079183,8.526208,8.429348,8.729167,...,10.553611,9.522727,10.968254,8.862934,8.610778,8.982906,8.273469,8.285,9.144737,9.507463
onset-to-arrival time,104.0,97.0,100.0,103.0,104.0,105.0,113.0,110.0,104.5,111.0,...,109.0,115.5,103.0,115.0,115.0,115.0,120.0,120.0,151.0,150.0
onset known,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
arrive in 4 hours,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
precise onset known,0.6296,0.630313,0.640778,0.637658,0.632457,0.621079,0.615042,0.736896,0.766304,0.652778,...,0.043764,0.030303,0.022222,0.162162,0.083832,0.19943,0.497959,0.46,0.763158,0.761194
onset during sleep,0.0467,0.044484,0.04642,0.048105,0.045635,0.050433,0.045261,0.055498,0.070652,0.097222,...,0.004376,0.0,0.006349,0.03861,0.05988,0.02849,0.073469,0.08,0.0,0.0


In [17]:
summary_stats_4hr_df.round(3).to_csv('summary_stats_4hr.csv')

In [18]:
rows = [
    'age', 'infarction', 'precise onset known', 'onset during sleep',
    'use of AF anticoagulants', 'prior disability', 'prestroke mrs 0-2', 
    'stroke severity',
    'onset-to-arrival time', 'arrival-to-scan time', 'thrombolysis',
    'scan-to-thrombolysis time', 'discharge disability', 'death',
    'increased disability due to stroke', 'mrs 5-6', 'mrs 0-2'
]

for row in rows:
    key_results[row] = summary_stats_4hr_df.loc[row]


In [19]:
key_results = key_results.round(3).T

In [20]:
key_results

Unnamed: 0,all E+W (2016 to 2021),all E+W (2016),all E+W (2017),all E+W (2018),all E+W (2019),all E+W (2020),all E+W (2021),Northumbria Specialist Emergency Care Hospital HASU (2016 to 2021),Northumbria Specialist Emergency Care Hospital HASU (2016),Northumbria Specialist Emergency Care Hospital HASU (2017),...,Queen's Medical Centre - Nottingham (2016 to 2021),Queen's Medical Centre - Nottingham (2020),Queen's Medical Centre - Nottingham (2021),Invicta Ward Kent and Canterbury Hospital (2016 to 2021),Invicta Ward Kent and Canterbury Hospital (2020),Invicta Ward Kent and Canterbury Hospital (2021),University Hospitals Dorset Stroke Service (2016 to 2021),University Hospitals Dorset Stroke Service (2021),Grange University Hospital (2016 to 2021),Grange University Hospital (2021)
onset known,0.705057,0.694567,0.694941,0.706786,0.718975,0.70714,0.707602,0.807393,0.799486,0.786408,...,0.574468,0.708661,0.534519,0.775984,0.748619,0.791733,0.8,0.817102,0.688034,0.695876
arrive in 4 hours,0.436868,0.450822,0.447343,0.446063,0.442075,0.425335,0.41207,0.473249,0.473008,0.466019,...,0.373977,0.519685,0.329498,0.522704,0.461326,0.558029,0.471154,0.475059,0.324786,0.345361
thrombolysis all arrivals,0.134022,0.1357,0.136727,0.138024,0.139743,0.128621,0.126099,0.14251,0.14653,0.113269,...,0.157119,0.200787,0.141213,0.173562,0.143646,0.190779,0.132692,0.118765,0.145299,0.159794
age,75.124691,75.416103,75.287577,75.289116,75.269506,74.657722,74.8152,73.702467,74.565217,74.409722,...,73.561269,72.954545,74.214286,75.405405,75.494012,75.363248,75.908163,75.95,72.5,72.873134
infarction,0.848622,0.849787,0.852916,0.850049,0.844823,0.84656,0.847471,0.840699,0.798913,0.847222,...,0.873085,0.909091,0.863492,0.901544,0.934132,0.88604,0.84898,0.835,0.776316,0.761194
precise onset known,0.6296,0.630313,0.640778,0.637658,0.632457,0.621079,0.615042,0.736896,0.766304,0.652778,...,0.043764,0.030303,0.022222,0.162162,0.083832,0.19943,0.497959,0.46,0.763158,0.761194
onset during sleep,0.0467,0.044484,0.04642,0.048105,0.045635,0.050433,0.045261,0.055498,0.070652,0.097222,...,0.004376,0.0,0.006349,0.03861,0.05988,0.02849,0.073469,0.08,0.0,0.0
use of AF anticoagulants,0.140963,0.104859,0.115932,0.147473,0.155999,0.157971,0.164346,0.109969,0.070652,0.090278,...,0.113786,0.106061,0.120635,0.104247,0.143713,0.08547,0.138776,0.12,0.052632,0.059701
prior disability,1.099607,1.092832,1.089853,1.12896,1.123949,1.067808,1.093728,1.121274,1.119565,1.076389,...,0.512035,0.590909,0.495238,0.683398,0.562874,0.740741,1.016327,0.99,0.486842,0.447761
prestroke mrs 0-2,0.788491,0.783568,0.790754,0.781681,0.781736,0.797919,0.795416,0.79445,0.809783,0.833333,...,0.905908,0.893939,0.907937,0.905405,0.922156,0.897436,0.791837,0.8,0.934211,0.940299
