# Predicting stroke admissions by LSOA

In [1]:
import pandas as pd

In [2]:
collated = pd.read_csv('./data/collated_data.csv', low_memory=False)
msoa = pd.read_csv('./data/lsoa_to_msoa.csv')
collated = pd.merge(collated, msoa, left_on='LSOA', right_on='lsoa11nm', how='left')

In [3]:
collated.head()

Unnamed: 0,LSOA,admissions,closest_ivt_unit,closest_ivt_unit_time,closest_mt_unit,closest_mt_unit_time,mt_transfer_time,ivt_rate,IMD2019Score,Local Authority District name (2019),...,age band males 80,age band males 85,age band males 90,age band males 95,lsoa11cd,msoa11cd,ladcd,lsoa11nm,msoa11nm,country
0,Welwyn Hatfield 010F,0.666667,SG14AB,18.7,NW12BU,36.9,46.6,6.8,15.616,Welwyn Hatfield,...,15.0,4.0,3.0,0.0,E01033311,E02004989,E07000241,Welwyn Hatfield 010F,Welwyn Hatfield 010,E
1,Welwyn Hatfield 012A,4.0,SG14AB,19.8,NW12BU,36.9,46.6,6.8,33.313,Welwyn Hatfield,...,15.0,9.0,7.0,0.0,E01023920,E02004991,E07000241,Welwyn Hatfield 012A,Welwyn Hatfield 012,E
2,Welwyn Hatfield 002F,2.0,SG14AB,18.7,NW12BU,38.0,46.6,6.8,7.043,Welwyn Hatfield,...,5.0,8.0,5.0,0.0,E01033313,E02004981,E07000241,Welwyn Hatfield 002F,Welwyn Hatfield 002,E
3,Welwyn Hatfield 002E,0.666667,SG14AB,18.7,NW12BU,36.9,46.6,6.8,8.249,Welwyn Hatfield,...,18.0,5.0,1.0,0.0,E01033310,E02004981,E07000241,Welwyn Hatfield 002E,Welwyn Hatfield 002,E
4,Welwyn Hatfield 010A,3.333333,SG14AB,18.7,NW12BU,36.9,46.6,6.8,19.271,Welwyn Hatfield,...,12.0,9.0,2.0,0.0,E01023929,E02004989,E07000241,Welwyn Hatfield 010A,Welwyn Hatfield 010,E


In [4]:
msoa.head()

Unnamed: 0,lsoa11cd,msoa11cd,ladcd,lsoa11nm,msoa11nm,country
0,E01000001,E02000001,E09000001,City of London 001A,City of London 001,E
1,E01000002,E02000001,E09000001,City of London 001B,City of London 001,E
2,E01000003,E02000001,E09000001,City of London 001C,City of London 001,E
3,E01000005,E02000001,E09000001,City of London 001E,City of London 001,E
4,E01000006,E02000017,E09000002,Barking and Dagenham 016A,Barking and Dagenham 016,E


## Add sum of age bands

In [5]:
cols = ['age band 0', 'age band 5', 'age band 10', 'age band 15', 'age band 20',
    'age band 25', 'age band 30', 'age band 35', 'age band 40', 'age band 45',
    'age band 50', 'age band 55', 'age band 60']

f = lambda x: x[cols].sum()
collated['0-64'] = collated.apply(f, axis=1)

In [6]:
cols = ['age band 65', 'age band 70', 'age band 75']

f = lambda x: x[cols].sum()
collated['65-79'] = collated.apply(f, axis=1)

In [7]:
cols = ['age band 80', 'age band 85', 'age band 90', 'age band 95']

f = lambda x: x[cols].sum()
collated['80+'] = collated.apply(f, axis=1)

## Total by MSOA

In [8]:
# Get admissions by MSOA
msoa_data = collated[['msoa11nm', 'admissions']].groupby('msoa11nm').sum()

In [9]:
# Get average IMD
imd = collated[['msoa11nm', 'IMD2019Score']].groupby('msoa11nm').mean()
msoa_data = pd.merge(msoa_data, imd, left_index=True, right_index=True, how='left')

In [10]:
# Add population
persons = collated[['msoa11nm', 'All persons']].groupby('msoa11nm').sum()
msoa_data = pd.merge(msoa_data, persons, left_index=True, right_index=True, how='left')


In [11]:
persons = collated[['msoa11nm', '0-64', '65-79', '80+']].groupby('msoa11nm').sum()
msoa_data = pd.merge(msoa_data, persons, left_index=True, right_index=True, how='left')

In [12]:
country = collated[['msoa11nm', 'country']].groupby('msoa11nm').first()
msoa_data = pd.merge(msoa_data, country, left_index=True, right_index=True, how='left')

In [13]:
msoa_data.head()

Unnamed: 0_level_0,admissions,IMD2019Score,All persons,0-64,65-79,80+,country
msoa11nm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Adur 001,14.333333,16.924833,8815.0,6905.0,1339.0,571.0,E
Adur 002,7.333333,6.4704,7263.0,5431.0,1345.0,487.0,E
Adur 003,9.333333,13.7334,7354.0,5745.0,1157.0,452.0,E
Adur 004,21.0,26.199857,10582.0,8583.0,1371.0,628.0,E
Adur 005,13.666667,11.7948,9059.0,6995.0,1479.0,585.0,E


## Save

In [14]:
msoa_data.to_csv('./data/msoa_collated.csv', index_label='MSOA')