# Collate demographic data for each LSOA

This notebook collates various and saves as `collated_data.csv` in data folder. 

In [1]:
import numpy as np
import pandas as pd

## Unzip required travel matrix file

In [2]:
import zipfile
with zipfile.ZipFile("./data/lsoa_travel_time_matrix_calibrated.zip","r") as zip_ref:
    zip_ref.extractall("./data/")

## Import admissions data

In [3]:
data = pd.read_csv('./data/admissions_2017-2019.csv', index_col='area')
data.rename(columns={'Admissions': 'admissions'}, inplace=True)

data.head(2)

Unnamed: 0_level_0,admissions
area,Unnamed: 1_level_1
Welwyn Hatfield 010F,0.666667
Welwyn Hatfield 012A,4.0


## Import units

In [4]:
units = pd.read_csv('./data/stroke_hospitals_2022.csv')

units.head(2)

Unnamed: 0,Postcode,Hospital_name,Use_IVT,Use_MT,Country,Strategic Clinical Network,Health Board / Trust,Stroke Team,SSNAP name,Admissions 21/22,Thrombolysis,ivt_rate,Easting,Northing,Neuroscience,30 England Thrombectomy Example,hospital_city,Notes
0,RM70AG,RM70AG,1,1,England,London SCN,Barking; Havering and Redbridge University Hos...,"Queen's Hospital, Romford",Queens Hospital Romford HASU,981,117,11.9,551118,187780,1,0,Romford,
1,E11BB,E11BB,1,1,England,London SCN,Barts Health NHS Trust,The Royal London Hospital,Royal London Hospital HASU,861,115,13.4,534829,181798,1,1,Royal London,


Get postcodes of unit in use for IVT

In [5]:
mask = units['Use_IVT'] == 1
ivt_units = list(units[mask]['Postcode'])
ivt_units[0:5]

['RM70AG', 'E11BB', 'SW66SX', 'SE59RW', 'BR68ND']

Get postcodes of units providing thrombectomy.

In [6]:
mask = units['Use_MT'] == 1
mt_units = list(units[mask]['Postcode'])
mt_units[0:5]

['RM70AG', 'E11BB', 'SW66SX', 'SE59RW', 'SW170QT']

## Import travel time to stroke units

In [7]:
travel_time = pd.read_csv('./data/lsoa_travel_time_matrix_calibrated.csv', index_col='LSOA')

### Add travel times to thrombolysis units 
Limit to units that use IVT

In [8]:
travel_time_ivt = travel_time[ivt_units]
travel_time_ivt.head(2)

Unnamed: 0_level_0,RM70AG,E11BB,SW66SX,SE59RW,BR68ND,HA13UJ,SW170QT,NW12BU,DE223NE,NN15BD,...,LL137TD,LL572PW,CF144XW,CF479DT,CF311RQ,SY231ER,SA148QF,SA312AF,SA612PZ,SA66NL
LSOA,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
City of London 001A,34.3,11.7,26.7,18.1,39.6,32.1,27.8,13.8,142.9,82.6,...,202.0,276.2,165.4,190.2,180.5,256.8,210.6,227.8,259.0,203.1
City of London 001B,33.2,10.6,28.9,18.1,38.6,33.2,26.7,13.8,142.9,82.6,...,203.1,276.2,166.5,191.2,181.6,256.8,212.7,228.9,261.1,204.1


Get closest unit and time  for IVT (convert series to dataframes)

In [9]:
closest_ivt_unit = pd.DataFrame(travel_time_ivt.idxmin(axis=1), columns=['closest_ivt_unit'])
closest_ivt_unit_time = pd.DataFrame(travel_time_ivt.min(axis=1), columns=['closest_ivt_unit_time'])

Merge into admissions

In [10]:
data = data.merge(closest_ivt_unit, left_index=True, right_index=True, how='left')
data = data.merge(closest_ivt_unit_time, left_index=True, right_index=True,  how='left')
data.head(2)

Unnamed: 0_level_0,admissions,closest_ivt_unit,closest_ivt_unit_time
area,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Welwyn Hatfield 010F,0.666667,SG14AB,19.2
Welwyn Hatfield 012A,4.0,SG14AB,20.3


### Add travel times to thrombectomy units 
Limit to units that use MT

In [11]:
travel_time_mt = travel_time[mt_units]
travel_time_mt.head(2)

Unnamed: 0_level_0,RM70AG,E11BB,SW66SX,SE59RW,SW170QT,NW12BU,NG72UH,CB20QQ,B152TH,CV22DX,...,NE14LP,HU32JZ,LS13EX,S102JF,BN25BE,BS105NB,PL68DH,OX39DU,SO166YD,CF144XW
LSOA,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
City of London 001A,34.3,11.7,26.7,18.1,27.8,13.8,139.6,70.8,137.5,109.5,...,283.7,218.1,202.0,175.1,86.9,135.3,250.4,76.2,98.8,165.4
City of London 001B,33.2,10.6,28.9,18.1,26.7,13.8,139.6,69.7,137.5,109.5,...,283.7,218.1,202.0,175.1,86.9,136.4,251.4,76.2,100.9,166.5


Get closest unit and time  for MT (convert series to dataframes)

In [12]:
closest_mt_unit = pd.DataFrame(travel_time_mt.idxmin(axis=1), columns=['closest_mt_unit'])
closest_mt_unit_time = pd.DataFrame(travel_time_mt.min(axis=1), columns=['closest_mt_unit_time'])

In [13]:
data = data.merge(closest_mt_unit, left_index=True, right_index=True, how='left')
data = data.merge(closest_mt_unit_time, left_index=True, right_index=True, how='left')
data.head(2)

Unnamed: 0_level_0,admissions,closest_ivt_unit,closest_ivt_unit_time,closest_mt_unit,closest_mt_unit_time
area,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Welwyn Hatfield 010F,0.666667,SG14AB,19.2,NW12BU,37.5
Welwyn Hatfield 012A,4.0,SG14AB,20.3,NW12BU,37.5


Add travel time between IVT and ET units.

In [14]:
inter_hospital_times = pd.read_csv('./data/inter_hospital_time_calibrated.csv', index_col='from_postcode')

In [15]:
transfer_times = []
for index, value in data.iterrows():
    ivt_unit = value['closest_ivt_unit']
    mt_unit = value['closest_mt_unit']
    transfer_time = inter_hospital_times.loc[ivt_unit][mt_unit]
    transfer_times.append(transfer_time)

In [16]:
data['mt_transfer_time'] = transfer_times
data.head(2)

Unnamed: 0_level_0,admissions,closest_ivt_unit,closest_ivt_unit_time,closest_mt_unit,closest_mt_unit_time,mt_transfer_time
area,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Welwyn Hatfield 010F,0.666667,SG14AB,19.2,NW12BU,37.5,47.2
Welwyn Hatfield 012A,4.0,SG14AB,20.3,NW12BU,37.5,47.2


In [17]:
lsoa_index = data.index
lsoa_index

Index(['Welwyn Hatfield 010F', 'Welwyn Hatfield 012A', 'Welwyn Hatfield 002F',
       'Welwyn Hatfield 002E', 'Welwyn Hatfield 010A', 'Welwyn Hatfield 010B',
       'Welwyn Hatfield 012B', 'Welwyn Hatfield 011D', 'Welwyn Hatfield 011A',
       'Welwyn Hatfield 012C',
       ...
       'Cardiff 005F', 'Cardiff 049D', 'Cardiff 049E', 'Cardiff 049F',
       'Cardiff 005G', 'Cardiff 006F', 'Swansea 025F', 'Swansea 023E',
       'Swansea 025G', 'Swansea 025H'],
      dtype='object', name='area', length=34752)

## Add thrombolysis rate of closest IVT unit

In [19]:
mask = units['Use_IVT'] == 1
ivt_rate = units[mask][['Postcode', 'ivt_rate']]

data = data.merge(
    ivt_rate, left_on='closest_ivt_unit', right_on='Postcode', how='left')
data.drop('Postcode', axis=1, inplace=True)
data.set_index(lsoa_index, inplace=True)

In [20]:
data.head(2)

Unnamed: 0_level_0,admissions,closest_ivt_unit,closest_ivt_unit_time,closest_mt_unit,closest_mt_unit_time,mt_transfer_time,ivt_rate
area,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Welwyn Hatfield 010F,0.666667,SG14AB,19.2,NW12BU,37.5,47.2,6.8
Welwyn Hatfield 012A,4.0,SG14AB,20.3,NW12BU,37.5,47.2,6.8


## Import index multiple deprivation demographics

# Wales
imd_wales = pd.read_csv(
    './data/demographic/Welsh IMD 2019/Welsh IMD 2019/WIMD2019_Scores.csv',
    index_col='LSOA_Code')
imd_wales = imd_wales[['WIMD2019_Score']]

#England
imd_england = pd.read_csv(
    './data/demographic/English IMD 2019/English IMD 2019/IMD_2019.csv',
    index_col='LSOA_Code')
imd_england = imd_england[['WIMD2019_Score']]


## Import deprivation demographics

In [21]:
deprivation = pd.read_csv(
    './data/demographic/File_7a_-_All_IoD2019_Scores__Ranks__Deciles_and_Population_Denominators_3.csv',
    index_col='LSOA name (2011)')
cols_to_drop = ['LSOA Code (2011)','Local Authority District code (2019)']
deprivation.drop(cols_to_drop, axis=1, inplace=True)

In [22]:
deprivation.head(2)

Unnamed: 0_level_0,Local Authority District name (2019),Income Domain Score,Income Domain Rank (where 1 is most deprived),IDACI Score,IDACI Rank (where 1 is most deprived),IDAOPI Score,IDAOPI Rank (where 1 is most deprived)
LSOA name (2011),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
City of London 001A,City of London,0.007,34740,0.006,34715,0.012,34729
City of London 001B,City of London,0.034,31762,0.037,31529,0.03,33839


In [23]:
data = data.merge(deprivation, left_index=True, right_index=True,  how='left')

data.set_index(lsoa_index, inplace=True)

data.head(2)

Unnamed: 0_level_0,admissions,closest_ivt_unit,closest_ivt_unit_time,closest_mt_unit,closest_mt_unit_time,mt_transfer_time,ivt_rate,Local Authority District name (2019),Income Domain Score,Income Domain Rank (where 1 is most deprived),IDACI Score,IDACI Rank (where 1 is most deprived),IDAOPI Score,IDAOPI Rank (where 1 is most deprived)
area,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Welwyn Hatfield 010F,0.666667,SG14AB,19.2,NW12BU,37.5,47.2,6.8,Welwyn Hatfield,0.104,17062,0.125,17700,0.186,11760
Welwyn Hatfield 012A,4.0,SG14AB,20.3,NW12BU,37.5,47.2,6.8,Welwyn Hatfield,0.176,9058,0.27,6329,0.269,6337


## Import ethnicity

In [24]:
ethnicity = pd.read_csv(
    './data/demographic/ethnicity.csv',
    index_col='geography')

# Include only ethnicity for all people (no age/gender breakdown)
cols_to_include = [
    x for x in list(ethnicity) if 'Sex: All persons; Age: All categories' in x]

# Get selected data
extract = ethnicity[cols_to_include]

# Truncate field name
rename_dict = dict()
for field in list(extract):
    rename_dict[field] = field[44:]
extract = extract.rename(rename_dict, axis='columns')
    
# Merge in with data
data = data.merge(extract, left_index=True, right_index=True,  how='left')

data.set_index(lsoa_index, inplace=True)

data.head(2)

Unnamed: 0_level_0,admissions,closest_ivt_unit,closest_ivt_unit_time,closest_mt_unit,closest_mt_unit_time,mt_transfer_time,ivt_rate,Local Authority District name (2019),Income Domain Score,Income Domain Rank (where 1 is most deprived),...,Ethnic Group: Asian/Asian British: Bangladeshi; measures: Value,Ethnic Group: Asian/Asian British: Chinese; measures: Value,Ethnic Group: Asian/Asian British: Other Asian; measures: Value,Ethnic Group: Black/African/Caribbean/Black British: Total; measures: Value,Ethnic Group: Black/African/Caribbean/Black British: African; measures: Value,Ethnic Group: Black/African/Caribbean/Black British: Caribbean; measures: Value,Ethnic Group: Black/African/Caribbean/Black British: Other Black; measures: Value,Ethnic Group: Other ethnic group: Total; measures: Value,Ethnic Group: Other ethnic group: Arab; measures: Value,Ethnic Group: Other ethnic group: Any other ethnic group; measures: Value
area,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Welwyn Hatfield 010F,0.666667,SG14AB,19.2,NW12BU,37.5,47.2,6.8,Welwyn Hatfield,0.104,17062,...,14,23,35,105,73,30,2,14,6,8
Welwyn Hatfield 012A,4.0,SG14AB,20.3,NW12BU,37.5,47.2,6.8,Welwyn Hatfield,0.176,9058,...,28,37,23,151,127,11,13,25,11,14


## Import health

In [25]:
health = pd.read_csv(
    './data/demographic/general_health.csv',
    index_col='geography')

# Include only ethnicity for all people (no age/gender breakdown)
cols_to_include = [
    x for x in list(health) if 'Sex: All persons; Age: All categories' in x]

# Get selected data
extract = health[cols_to_include]

# Truncate field name
rename_dict = dict()
for field in list(extract):
    rename_dict[field] = field[44:]
extract = extract.rename(rename_dict, axis='columns')
    
# Merge in with data
data = data.merge(extract, left_index=True, right_index=True,  how='left')
data.set_index(lsoa_index, inplace=True)

data.head(2)

Unnamed: 0_level_0,admissions,closest_ivt_unit,closest_ivt_unit_time,closest_mt_unit,closest_mt_unit_time,mt_transfer_time,ivt_rate,Local Authority District name (2019),Income Domain Score,Income Domain Rank (where 1 is most deprived),...,General Health: Fair health; Ethnic Group: Other ethnic group; measures: Value,General Health: Bad or very bad health; Ethnic Group: All categories: Ethnic group; measures: Value,General Health: Bad or very bad health; Ethnic Group: White: Total; measures: Value,General Health: Bad or very bad health; Ethnic Group: White: English/Welsh/Scottish/Northern Irish/British; measures: Value,General Health: Bad or very bad health; Ethnic Group: White: Irish; measures: Value,General Health: Bad or very bad health; Ethnic Group: White: Other White; measures: Value,General Health: Bad or very bad health; Ethnic Group: Mixed/multiple ethnic group; measures: Value,General Health: Bad or very bad health; Ethnic Group: Asian/Asian British; measures: Value,General Health: Bad or very bad health; Ethnic Group: Black/African/Caribbean/Black British; measures: Value,General Health: Bad or very bad health; Ethnic Group: Other ethnic group; measures: Value
area,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Welwyn Hatfield 010F,0.666667,SG14AB,19.2,NW12BU,37.5,47.2,6.8,Welwyn Hatfield,0.104,17062,...,1,64,55,51,1,3,3,2,4,0
Welwyn Hatfield 012A,4.0,SG14AB,20.3,NW12BU,37.5,47.2,6.8,Welwyn Hatfield,0.176,9058,...,1,103,98,87,5,6,1,1,2,1


## Import rural vs urban

In [26]:
rural_urban = pd.read_csv(
    './data/demographic/Rural_Urban_Classification_2011_of_Lower_Layer_Super_Output_Areas_in_England_and_Wales.csv',
    index_col='LSOA11NM')
cols_to_drop = ['LSOA11CD','RUC11CD', 'FID']
rural_urban.drop(cols_to_drop, axis=1, inplace=True)

data = data.merge(rural_urban, left_index=True, right_index=True,  how='left')
data.set_index(lsoa_index, inplace=True)

data.head(2)

Unnamed: 0_level_0,admissions,closest_ivt_unit,closest_ivt_unit_time,closest_mt_unit,closest_mt_unit_time,mt_transfer_time,ivt_rate,Local Authority District name (2019),Income Domain Score,Income Domain Rank (where 1 is most deprived),...,General Health: Bad or very bad health; Ethnic Group: All categories: Ethnic group; measures: Value,General Health: Bad or very bad health; Ethnic Group: White: Total; measures: Value,General Health: Bad or very bad health; Ethnic Group: White: English/Welsh/Scottish/Northern Irish/British; measures: Value,General Health: Bad or very bad health; Ethnic Group: White: Irish; measures: Value,General Health: Bad or very bad health; Ethnic Group: White: Other White; measures: Value,General Health: Bad or very bad health; Ethnic Group: Mixed/multiple ethnic group; measures: Value,General Health: Bad or very bad health; Ethnic Group: Asian/Asian British; measures: Value,General Health: Bad or very bad health; Ethnic Group: Black/African/Caribbean/Black British; measures: Value,General Health: Bad or very bad health; Ethnic Group: Other ethnic group; measures: Value,RUC11
area,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Welwyn Hatfield 010F,0.666667,SG14AB,19.2,NW12BU,37.5,47.2,6.8,Welwyn Hatfield,0.104,17062,...,64,55,51,1,3,3,2,4,0,Urban city and town
Welwyn Hatfield 012A,4.0,SG14AB,20.3,NW12BU,37.5,47.2,6.8,Welwyn Hatfield,0.176,9058,...,103,98,87,5,6,1,1,2,1,Urban city and town


## Import age demographics

In [27]:
ages = pd.read_csv(
    './data/demographic/mid_2018_persons.csv',
    index_col='LSOA')

all_ages = ages['All Ages']
ages.drop('All Ages', axis=1, inplace=True)
data['All persons'] = all_ages

# Change '90+' to 91
rename_dict = dict()
rename_dict['90+'] = '91'
ages = ages.rename(rename_dict, axis='columns')

age_bands = pd.DataFrame()

for band in np.arange(0, 96,5):
    cols_to_get = []
    for field in list(ages):
        if int(int(field)/5) * 5 == band:
            cols_to_get.append(field)
    extract = ages[cols_to_get]
    age_bands[f'age band {band}'] = extract.sum(axis=1)
    
data = data.merge(age_bands, left_index=True, right_index=True,  how='left')
data.set_index(lsoa_index, inplace=True)
data.head(2)

Unnamed: 0_level_0,admissions,closest_ivt_unit,closest_ivt_unit_time,closest_mt_unit,closest_mt_unit_time,mt_transfer_time,ivt_rate,Local Authority District name (2019),Income Domain Score,Income Domain Rank (where 1 is most deprived),...,age band 50,age band 55,age band 60,age band 65,age band 70,age band 75,age band 80,age band 85,age band 90,age band 95
area,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Welwyn Hatfield 010F,0.666667,SG14AB,19.2,NW12BU,37.5,47.2,6.8,Welwyn Hatfield,0.104,17062,...,106.0,109.0,92.0,64.0,62.0,48.0,20.0,9.0,7.0,0.0
Welwyn Hatfield 012A,4.0,SG14AB,20.3,NW12BU,37.5,47.2,6.8,Welwyn Hatfield,0.176,9058,...,122.0,105.0,79.0,51.0,55.0,41.0,30.0,26.0,19.0,0.0


In [28]:
ages = pd.read_csv(
    './data/demographic/mid_2018_females.csv',
    index_col='LSOA')

all_ages = ages['All Ages']
ages.drop('All Ages', axis=1, inplace=True)
data['All females'] = all_ages

# Change '90+' to 91
rename_dict = dict()
rename_dict['90+'] = '91'
ages = ages.rename(rename_dict, axis='columns')

age_bands = pd.DataFrame()

for band in np.arange(0, 96,5):
    cols_to_get = []
    for field in list(ages):
        if int(int(field)/5) * 5 == band:
            cols_to_get.append(field)
    extract = ages[cols_to_get]
    age_bands[f'age band females {band}'] = extract.sum(axis=1)
    
data = data.merge(age_bands, left_index=True, right_index=True,  how='left')
data.set_index(lsoa_index, inplace=True)
data.head(2)

Unnamed: 0_level_0,admissions,closest_ivt_unit,closest_ivt_unit_time,closest_mt_unit,closest_mt_unit_time,mt_transfer_time,ivt_rate,Local Authority District name (2019),Income Domain Score,Income Domain Rank (where 1 is most deprived),...,age band females 50,age band females 55,age band females 60,age band females 65,age band females 70,age band females 75,age band females 80,age band females 85,age band females 90,age band females 95
area,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Welwyn Hatfield 010F,0.666667,SG14AB,19.2,NW12BU,37.5,47.2,6.8,Welwyn Hatfield,0.104,17062,...,53.0,47.0,55.0,32.0,35.0,18.0,5.0,5.0,4.0,0.0
Welwyn Hatfield 012A,4.0,SG14AB,20.3,NW12BU,37.5,47.2,6.8,Welwyn Hatfield,0.176,9058,...,64.0,42.0,36.0,15.0,30.0,27.0,15.0,17.0,12.0,0.0


In [29]:
ages = pd.read_csv(
    './data/demographic/mid_2018_males.csv',
    index_col='LSOA')

all_ages = ages['All Ages']
ages.drop('All Ages', axis=1, inplace=True)
data['All males'] = all_ages

# Change '90+' to 91
rename_dict = dict()
rename_dict['90+'] = '91'
ages = ages.rename(rename_dict, axis='columns')

age_bands = pd.DataFrame()

for band in np.arange(0, 96,5):
    cols_to_get = []
    for field in list(ages):
        if int(int(field)/5) * 5 == band:
            cols_to_get.append(field)
    extract = ages[cols_to_get]
    age_bands[f'age band males {band}'] = extract.sum(axis=1)
    
data = data.merge(age_bands, left_index=True, right_index=True,  how='left')
data.set_index(lsoa_index, inplace=True)
data.head(2)

Unnamed: 0_level_0,admissions,closest_ivt_unit,closest_ivt_unit_time,closest_mt_unit,closest_mt_unit_time,mt_transfer_time,ivt_rate,Local Authority District name (2019),Income Domain Score,Income Domain Rank (where 1 is most deprived),...,age band males 50,age band males 55,age band males 60,age band males 65,age band males 70,age band males 75,age band males 80,age band males 85,age band males 90,age band males 95
area,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Welwyn Hatfield 010F,0.666667,SG14AB,19.2,NW12BU,37.5,47.2,6.8,Welwyn Hatfield,0.104,17062,...,53.0,62.0,37.0,32.0,27.0,30.0,15.0,4.0,3.0,0.0
Welwyn Hatfield 012A,4.0,SG14AB,20.3,NW12BU,37.5,47.2,6.8,Welwyn Hatfield,0.176,9058,...,58.0,63.0,43.0,36.0,25.0,14.0,15.0,9.0,7.0,0.0


## Save output

In [30]:
data.to_csv('./data/collated_data.csv', index_label='LSOA')