To Do:

Model of death

# Descriptive stats for South West hospitals

In [1]:
import pandas as pd


In [2]:
sw = pd.read_csv('stroke_teams.csv', index_col=False)

In [3]:
# List of stroke teams
sw = list(sw.squeeze().values)

In [4]:
sw_dic = {
    'Royal Cornwall Hospital': 'SW1',
    'Royal Devon and Exeter Hospital': 'SW2',
    'North Devon District Hospital': 'SW3',
    'Torbay Hospital': 'SW4',
    'Musgrove Park Hospital': 'SW5',
    'Yeovil District Hospital': 'SW6'
}

# sw = list(sw_dic.keys())

# Read in the data
data_all = pd.read_csv('~/ssnap_data/data.csv')

key_results = pd.DataFrame()

In [5]:
years_covered = sorted(list(set(data_all['year'])))
all_years_str = f'{years_covered[0]} to {years_covered[-1]}'

In [6]:
# # Limit data to years 2021 to 2021
# data_all = data_all[(data_all['year'] >= 2021)
#                     & (data_all['year'] <= 2021)]

# Limit data to out of hopsital onset arriving by ambulance only
data_all = data_all[data_all['onset-to-arrival time'] > 0]
data_all = data_all[data_all['arrive by ambulance'] == True]

# Add new fields
data_all['count'] = 1
data_all['prestroke mrs 0-2'] = data_all['prior disability'] <= 2
data_all['mrs 5-6'] = data_all['discharge disability'] >= 5
data_all['mrs 0-2'] = data_all['discharge disability'] <= 2


Restrict fields

In [7]:
required_fields: list = [
    'count', 'stroke team', 'age', 'male', 'infarction', 'stroke severity',
    'onset-to-arrival time', 'onset known', 'precise onset known',
    'onset during sleep', 'arrive by ambulance', 'year',
    'use of AF anticoagulants', 'prior disability', 'prestroke mrs 0-2',
    'arrival-to-scan time', 'thrombolysis',
    'scan-to-thrombolysis time', 'death', 'discharge disability', 'mrs 5-6', 'mrs 0-2'
]

data_all = data_all[required_fields]
mask = (data_all['onset known'] == True) & (
    data_all['onset-to-arrival time'] <= 240)
data_all['arrive in 4  hours'] = mask

# Add change in disability
data_all['increased disability due to stroke'] = data_all['discharge disability'] - data_all['prior disability']

In [8]:
summary_stats_dict = {
    'count': 'sum',
    'stroke team': 'none',
    'age': 'mean',
    'male': 'mean',
    'infarction': 'mean',
    'stroke severity': 'mean',
    'onset-to-arrival time': 'median',
    'onset known': 'mean',
    'arrive in 4  hours': 'mean',
    'precise onset known': 'mean',
    'onset during sleep': 'mean',
    'year': 'none',
    'use of AF anticoagulants': 'mean',
    'prior disability': 'mean',
    'prestroke mrs 0-2': 'mean',
    'arrival-to-scan time':     'median',
    'thrombolysis':     'mean',
    'scan-to-thrombolysis time': 'median',
    'death': 'mean',
    'discharge disability': 'mean',
    'increased disability due to stroke': 'mean',
    'mrs 5-6': 'mean',
    'mrs 0-2': 'mean'
}

In [9]:
summary_stats = dict()
for year in [all_years_str] + years_covered:
    if year == all_years_str:
        data = data_all
    else:
        data = data_all[(
            (data_all['year'] == year)
            )]
    results = dict()
    for k, v in summary_stats_dict.items():
        if v == 'mean':
            results[k] = data[k].mean()
        elif v == 'median':
            results[k] = data[k].median()
        elif v == 'sum':
            results[k] = data[k].sum()
    results['stroke_team'] = 'all E+W'
    results['year'] = year
    summary_stats[f'all E+W ({year})'] = results


In [10]:
for hospital in sw:
    for year in [all_years_str] + years_covered:
        if year == all_years_str:
            data = data_all[data_all['stroke team'] == hospital]
        else:
            data = data_all[(
            (data_all['stroke team'] == hospital) &
            (data_all['year'] == year)
            )]
        results = dict()
        for k, v in summary_stats_dict.items():
            if v == 'mean':
                results[k] = data[k].mean()
            elif v == 'median':
                results[k] = data[k].median()
            elif v == 'sum':
                results[k] = data[k].sum()
        results['stroke_team'] = hospital
        results['year'] = year
        summary_stats[f'{hospital} ({year})'] = results


In [11]:
summary_stats_df = pd.DataFrame(summary_stats)
# summary_stats_df.rename(sw_dic, axis=1, inplace = True)

# Only keep hospitals with more than 100 admissions:
mask_count = summary_stats_df.loc['count'] > 100
summary_stats_df = summary_stats_df.T[mask_count].T

In [34]:

key_results['onset known'] = summary_stats_df.loc['onset known']
key_results['arrive in 4 hours'] = summary_stats_df.loc['arrive in 4  hours']
key_results['thrombolysis all arrivals'] = summary_stats_df.loc['thrombolysis']

# Round floats to 3 decimal places:
summary_stats_df = summary_stats_df.applymap(lambda x: round(x, 3) if isinstance(x, (float)) else x)

# Convert "time" rows to float:
time_rows = ['onset-to-arrival time', 'scan-to-thrombolysis time', 'arrival-to-scan time']
# summary_stats_4hr_df.loc[time_rows] = 
for r in time_rows:
    summary_stats_df.loc[r] = pd.to_numeric(summary_stats_df.loc[r])

summary_stats_df

Unnamed: 0,all E+W (2016 to 2021),all E+W (2016),all E+W (2017),all E+W (2018),all E+W (2019),all E+W (2020),all E+W (2021),Northumbria Specialist Emergency Care Hospital HASU (2016 to 2021),Northumbria Specialist Emergency Care Hospital HASU (2016),Northumbria Specialist Emergency Care Hospital HASU (2017),...,Queen's Medical Centre - Nottingham (2016 to 2021),Queen's Medical Centre - Nottingham (2020),Queen's Medical Centre - Nottingham (2021),Invicta Ward Kent and Canterbury Hospital (2016 to 2021),Invicta Ward Kent and Canterbury Hospital (2020),Invicta Ward Kent and Canterbury Hospital (2021),University Hospitals Dorset Stroke Service (2016 to 2021),University Hospitals Dorset Stroke Service (2021),Grange University Hospital (2016 to 2021),Grange University Hospital (2021)
count,283946,45925,47145,46137,46793,47224,50722,2056,389,309,...,1222,254,956,991,362,629,520,421,234,194
age,75.571,75.798,75.856,75.719,75.803,75.159,75.134,74.229,74.994,74.636,...,73.854,74.488,73.86,75.356,76.174,74.885,75.712,75.564,73.739,73.737
male,0.515,0.505,0.505,0.515,0.514,0.52,0.528,0.537,0.563,0.579,...,0.488,0.48,0.485,0.526,0.514,0.533,0.554,0.539,0.526,0.5
infarction,0.865,0.865,0.867,0.866,0.862,0.866,0.865,0.857,0.843,0.883,...,0.856,0.894,0.848,0.902,0.945,0.878,0.875,0.862,0.803,0.804
stroke severity,8.086,8.104,8.26,8.248,8.177,7.926,7.827,7.453,6.933,7.142,...,9.014,9.283,8.924,7.918,7.608,8.097,6.74,6.708,7.235,7.247
onset-to-arrival time,331.0,308.0,318.0,316.0,319.0,350.0,370.0,270.0,273.0,297.0,...,460.0,214.0,561.0,220.0,285.0,193.0,257.0,255.0,482.5,488.0
onset known,0.705,0.695,0.695,0.707,0.719,0.707,0.708,0.807,0.799,0.786,...,0.574,0.709,0.535,0.776,0.749,0.792,0.8,0.817,0.688,0.696
arrive in 4 hours,0.437,0.451,0.447,0.446,0.442,0.425,0.412,0.473,0.473,0.466,...,0.374,0.52,0.329,0.523,0.461,0.558,0.471,0.475,0.325,0.345
precise onset known,0.361,0.366,0.367,0.369,0.363,0.354,0.348,0.459,0.53,0.427,...,0.017,0.016,0.007,0.103,0.039,0.14,0.277,0.252,0.423,0.418
onset during sleep,0.137,0.133,0.14,0.137,0.139,0.139,0.133,0.193,0.185,0.217,...,0.005,0.004,0.004,0.083,0.08,0.084,0.138,0.133,0.184,0.18


In [35]:
summary_stats_df.to_csv('summary_stats.csv')

## Limit analysis to arrival in 4 hours

In [14]:
mask = data_all['arrive in 4  hours'] == True
data_4hr = data_all[mask]

Add onset to thrombolysis



In [15]:
data_4hr['onset to thrombolysis'] = (
    data_4hr['onset-to-arrival time'] + 
    data_4hr['arrival-to-scan time'] + 
    data_4hr['scan-to-thrombolysis time'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_4hr['onset to thrombolysis'] = (


In [24]:
# Repeat analysis for patients arriving in 4 hours
summary_stats_4hr = dict()
for year in [all_years_str] + years_covered:
    if year == all_years_str:
        data = data_4hr
    else:
        data = data_4hr[(
            (data_4hr['year'] == year)
            )]
    results = dict()
    for k, v in summary_stats_dict.items():
        if v == 'mean':
            results[k] = data[k].mean()
        elif v == 'median':
            results[k] = data[k].median()
        elif v == 'sum':
            results[k] = data[k].sum()
    results['stroke_team'] = 'all E+W'
    results['year'] = year
    summary_stats_4hr[f'all E+W ({year})'] = results

for hospital in sw:
    for year in [all_years_str] + years_covered:
        if year == all_years_str:
            data = data_4hr[data_4hr['stroke team'] == hospital]
        else:
            data = data_4hr[(
                (data_4hr['stroke team'] == hospital) &
                (data_4hr['year'] == year)
                )]
        # data = data_4hr[data_4hr['stroke team'] == hospital]
        results = dict()
        for k, v in summary_stats_dict.items():
            if v == 'mean':
                results[k] = data[k].mean()
            elif v == 'median':
                results[k] = data[k].median()
            elif v == 'sum':
                results[k] = data[k].sum()
        results['stroke_team'] = hospital
        results['year'] = year
        summary_stats_4hr[f'{hospital} ({year})'] = results

summary_stats_4hr_df = pd.DataFrame(summary_stats_4hr)

# Only keep hospitals with more than 100 admissions
# in the full data (not the 4hr data) - same mask as earlier.
summary_stats_4hr_df = summary_stats_4hr_df.T[mask_count].T

# Round floats to 3 decimal places:
summary_stats_4hr_df = summary_stats_4hr_df.applymap(lambda x: round(x, 3) if isinstance(x, (float)) else x)


AttributeError: 'DataFrame' object has no attribute 'to_numeric'

In [32]:
# Convert "time" rows to float:
time_rows = ['onset-to-arrival time', 'scan-to-thrombolysis time', 'arrival-to-scan time']
# summary_stats_4hr_df.loc[time_rows] = 
for r in time_rows:
    summary_stats_4hr_df.loc[r] = pd.to_numeric(summary_stats_4hr_df.loc[r])

In [33]:
summary_stats_4hr_df.to_csv('summary_stats_4hr.csv')

In [18]:
rows = [
    'age', 'infarction', 'precise onset known', 'onset during sleep',
    'use of AF anticoagulants', 'prior disability', 'prestroke mrs 0-2', 
    'stroke severity',
    'onset-to-arrival time', 'arrival-to-scan time', 'thrombolysis',
    'scan-to-thrombolysis time', 'discharge disability', 'death',
    'increased disability due to stroke', 'mrs 5-6', 'mrs 0-2'
]

for row in rows:
    key_results[row] = summary_stats_4hr_df.loc[row]


In [19]:
key_results = key_results.round(3).T

In [20]:
key_results

Unnamed: 0,all E+W (2016 to 2021),all E+W (2016),all E+W (2017),all E+W (2018),all E+W (2019),all E+W (2020),all E+W (2021),Northumbria Specialist Emergency Care Hospital HASU (2016 to 2021),Northumbria Specialist Emergency Care Hospital HASU (2016),Northumbria Specialist Emergency Care Hospital HASU (2017),...,Queen's Medical Centre - Nottingham (2016 to 2021),Queen's Medical Centre - Nottingham (2020),Queen's Medical Centre - Nottingham (2021),Invicta Ward Kent and Canterbury Hospital (2016 to 2021),Invicta Ward Kent and Canterbury Hospital (2020),Invicta Ward Kent and Canterbury Hospital (2021),University Hospitals Dorset Stroke Service (2016 to 2021),University Hospitals Dorset Stroke Service (2021),Grange University Hospital (2016 to 2021),Grange University Hospital (2021)
onset known,0.705057,0.694567,0.694941,0.706786,0.718975,0.70714,0.707602,0.807393,0.799486,0.786408,...,0.574468,0.708661,0.534519,0.775984,0.748619,0.791733,0.8,0.817102,0.688034,0.695876
arrive in 4 hours,0.436868,0.450822,0.447343,0.446063,0.442075,0.425335,0.41207,0.473249,0.473008,0.466019,...,0.373977,0.519685,0.329498,0.522704,0.461326,0.558029,0.471154,0.475059,0.324786,0.345361
thrombolysis all arrivals,0.134022,0.1357,0.136727,0.138024,0.139743,0.128621,0.126099,0.14251,0.14653,0.113269,...,0.157119,0.200787,0.141213,0.173562,0.143646,0.190779,0.132692,0.118765,0.145299,0.159794
age,75.125,75.416,75.288,75.289,75.27,74.658,74.815,73.702,74.565,74.41,...,73.561,72.955,74.214,75.405,75.494,75.363,75.908,75.95,72.5,72.873
infarction,0.849,0.85,0.853,0.85,0.845,0.847,0.847,0.841,0.799,0.847,...,0.873,0.909,0.863,0.902,0.934,0.886,0.849,0.835,0.776,0.761
precise onset known,0.63,0.63,0.641,0.638,0.632,0.621,0.615,0.737,0.766,0.653,...,0.044,0.03,0.022,0.162,0.084,0.199,0.498,0.46,0.763,0.761
onset during sleep,0.047,0.044,0.046,0.048,0.046,0.05,0.045,0.055,0.071,0.097,...,0.004,0.0,0.006,0.039,0.06,0.028,0.073,0.08,0.0,0.0
use of AF anticoagulants,0.141,0.105,0.116,0.147,0.156,0.158,0.164,0.11,0.071,0.09,...,0.114,0.106,0.121,0.104,0.144,0.085,0.139,0.12,0.053,0.06
prior disability,1.1,1.093,1.09,1.129,1.124,1.068,1.094,1.121,1.12,1.076,...,0.512,0.591,0.495,0.683,0.563,0.741,1.016,0.99,0.487,0.448
prestroke mrs 0-2,0.788,0.784,0.791,0.782,0.782,0.798,0.795,0.794,0.81,0.833,...,0.906,0.894,0.908,0.905,0.922,0.897,0.792,0.8,0.934,0.94
