# Match hospitals to LSOAs

Find which LSOA contains each hospital.

## Setup

In [1]:
import pandas as pd
import numpy as np

# For importing geojson:
import json

# For creating new points:
# from geojson import Feature, Point #FeatureCollection
from shapely.geometry import shape, Point

## Import data

### Hospital locations

In [2]:
df_hospitals = pd.read_csv('../data/stroke_hospitals_2022.csv')

df_hospitals.head()

Unnamed: 0,Postcode,Hospital_name,Use_IVT,Use_MT,Use_MSU,Country,Strategic Clinical Network,Health Board / Trust,Stroke Team,SSNAP name,...,Thrombolysis,ivt_rate,Easting,Northing,long,lat,Neuroscience,30 England Thrombectomy Example,hospital_city,Notes
0,RM70AG,RM70AG,1,1,1,England,London SCN,Barking,Havering and Redbridge University Hospitals N...,Queens Hospital Romford HASU,...,117,11.9,551118,187780,0.179031,51.568647,1,0,Romford,
1,E11BB,E11BB,1,1,1,England,London SCN,Barts Health NHS Trust,The Royal London Hospital,Royal London Hospital HASU,...,115,13.4,534829,181798,-0.058133,51.519018,1,1,Royal London,
2,SW66SX,SW66SX,1,1,1,England,London SCN,Imperial College Healthcare NHS Trust,"Charing Cross Hospital, London",Charing Cross Hospital HASU,...,113,9.9,524226,176487,-0.212736,51.473717,1,1,Charing Cross,
3,SE59RW,SE59RW,1,1,1,England,London SCN,King's College Hospital NHS Foundation Trust,"King's College Hospital, London",King's College Hospital HASU,...,124,15.0,532536,176228,-0.093251,51.469505,1,0,Kings College,
4,BR68ND,BR68ND,1,0,0,England,London SCN,King's College Hospital NHS Foundation Trust,Princess Royal University Hospital,Princess Royal University Hospital HASU,...,113,13.3,543443,165032,0.059146,51.366243,0,0,Princess Royal,


In [3]:
hospital_points = []

for r in range(df_hospitals.shape[0]):
    # Get longitude and latitude from the dataframe
    # and convert it into a "Point" that the shape package can understand
    # and later compare with polygons of LSOA coordinates.
    long = df_hospitals['long'].iloc[r]
    lat = df_hospitals['lat'].iloc[r]
    point = Point(long, lat)

    hospital_points.append(point)

In [4]:
hospital_points

[<POINT (0.179 51.569)>,
 <POINT (-0.058 51.519)>,
 <POINT (-0.213 51.474)>,
 <POINT (-0.093 51.47)>,
 <POINT (0.059 51.366)>,
 <POINT (-0.322 51.575)>,
 <POINT (-0.176 51.427)>,
 <POINT (-0.137 51.525)>,
 <POINT (-1.515 52.911)>,
 <POINT (-0.884 52.236)>,
 <POINT (-1.186 52.943)>,
 <POINT (-1.16 52.991)>,
 <POINT (-1.232 53.134)>,
 <POINT (-0.52 53.234)>,
 <POINT (-0.01 52.991)>,
 <POINT (-1.135 52.628)>,
 <POINT (0.451 51.558)>,
 <POINT (-0.472 52.128)>,
 <POINT (0.139 52.174)>,
 <POINT (0.899 51.91)>,
 <POINT (-0.213 51.925)>,
 <POINT (1.198 52.057)>,
 <POINT (1.718 52.562)>,
 <POINT (-0.476 51.895)>,
 <POINT (0.466 51.775)>,
 <POINT (1.221 52.618)>,
 <POINT (-0.279 52.584)>,
 <POINT (0.447 52.757)>,
 <POINT (0.689 51.554)>,
 <POINT (-0.404 51.649)>,
 <POINT (0.709 52.232)>,
 <POINT (-1.656 52.818)>,
 <POINT (-2.119 52.503)>,
 <POINT (-1.478 52.513)>,
 <POINT (-1.83 52.48)>,
 <POINT (-2.096 52.6)>,
 <POINT (-1.988 52.527)>,
 <POINT (-2.511 52.712)>,
 <POINT (-1.583 52.29)>,
 <POINT 

### Geojson containing all LSOAs:

In [5]:
with open('./LSOA_2011.geojson') as f:
    geojson_ew = json.load(f)

In [6]:
# Fill these lists with the LSOAs containing the hospitals:
hospital_LSOA11CD = []
hospital_LSOA11NM = []
hospital_LSOA11NMW = []

for point in hospital_points:
    # Check each LSOA in turn to see if it contains this point.
    i = 0
    while i < len(geojson_ew['features']):
        # Get the coordinates and other info for this LSOA:
        feature = geojson_ew['features'][i]
        # Turn this into a shape:
        polygon = shape(feature['geometry'])
        if polygon.contains(point):
            # Save LSOA details to list:
            hospital_LSOA11CD.append(feature['properties']['LSOA11CD'])
            hospital_LSOA11NM.append(feature['properties']['LSOA11NM'])
            hospital_LSOA11NMW.append(feature['properties']['LSOA11NMW'])
            # Update the iterator so that the loop breaks:
            i = len(geojson_ew['features'])
        # Update the iterator for another go round the loop:
        i += 1

## Link the LSOAs with other regions

Import the big file we made earlier:

In [7]:
df_regions = pd.read_csv('./LSOA_regions.csv')

df_regions.head()

Unnamed: 0,LSOA11CD,LSOA11NM,CCG19CD,CCG19NM,STP19CD,STP19NM,LHB20CD,LHB20NM,LHB20NMW,LAD17CD,LAD17NM,SCN17CD,SCN17NM,RGN11CD,RGN11NM
0,E01031349,Adur 001A,E38000213,NHS Coastal West Sussex CCG,E54000033,Sussex and East Surrey,,,,E07000223,Adur,E55000010,South East Coast,E12000008,South East
1,E01031350,Adur 001B,E38000213,NHS Coastal West Sussex CCG,E54000033,Sussex and East Surrey,,,,E07000223,Adur,E55000010,South East Coast,E12000008,South East
2,E01031351,Adur 001C,E38000213,NHS Coastal West Sussex CCG,E54000033,Sussex and East Surrey,,,,E07000223,Adur,E55000010,South East Coast,E12000008,South East
3,E01031352,Adur 001D,E38000213,NHS Coastal West Sussex CCG,E54000033,Sussex and East Surrey,,,,E07000223,Adur,E55000010,South East Coast,E12000008,South East
4,E01031370,Adur 001E,E38000213,NHS Coastal West Sussex CCG,E54000033,Sussex and East Surrey,,,,E07000223,Adur,E55000010,South East Coast,E12000008,South East


In [8]:
all_regions = []

for LSOA in hospital_LSOA11CD:
    # Get region information for this hospital:
    regions_here = df_regions[df_regions['LSOA11CD'] == LSOA].values
    # Keep a copy of everything except the first two columns
    # (LSOA11 codes and names), which we already have ready:
    all_regions.append(regions_here[0][2:])

all_regions = np.array(all_regions, dtype=object)
all_regions

array([['E38000077', 'NHS Havering CCG', 'E54000029', ..., 'London',
        'E12000007', 'London'],
       ['E38000186', 'NHS Tower Hamlets CCG', 'E54000029', ..., 'London',
        'E12000007', 'London'],
       ['E38000070', 'NHS Hammersmith and Fulham CCG', 'E54000027', ...,
        'London', 'E12000007', 'London'],
       ...,
       [nan, nan, nan, ..., nan, 'W92000004', 'Wales'],
       [nan, nan, nan, ..., nan, 'W92000004', 'Wales'],
       [nan, nan, nan, ..., nan, 'W92000004', 'Wales']], dtype=object)

## Create a dataframe to save

In [9]:
# Gather the hospital names, locations, and containing LSOAs...
table = np.stack([
    df_hospitals['Postcode'],
    df_hospitals['Stroke Team'],
    df_hospitals['long'],
    df_hospitals['lat'],
    hospital_LSOA11CD,
    hospital_LSOA11NM,
    hospital_LSOA11NMW,
    ], axis=-1)

# ... and then add the other region information from the file.
table = np.concatenate([table, all_regions], axis=1)

In [10]:
df_hospitals_and_lsoas = pd.DataFrame(
    table,
    columns=[
        'Postcode',
        'Stroke Team',
        'long',
        'lat',
        'LSOA11CD',
        'LSOA11NM',
        'LSOA11NMW',
        *df_regions.columns[2:]
    ]
)

df_hospitals_and_lsoas.head()

Unnamed: 0,Postcode,Stroke Team,long,lat,LSOA11CD,LSOA11NM,LSOA11NMW,CCG19CD,CCG19NM,STP19CD,STP19NM,LHB20CD,LHB20NM,LHB20NMW,LAD17CD,LAD17NM,SCN17CD,SCN17NM,RGN11CD,RGN11NM
0,RM70AG,Havering and Redbridge University Hospitals N...,0.179031,51.568647,E01002248,Havering 017C,Havering 017C,E38000077,NHS Havering CCG,E54000029,East London Health and Care Partnership,,,,E09000016,Havering,E55000014,London,E12000007,London
1,E11BB,The Royal London Hospital,-0.058133,51.519018,E01004322,Tower Hamlets 017A,Tower Hamlets 017A,E38000186,NHS Tower Hamlets CCG,E54000029,East London Health and Care Partnership,,,,E09000030,Tower Hamlets,E55000014,London,E12000007,London
2,SW66SX,"Charing Cross Hospital, London",-0.212736,51.473717,E01001906,Hammersmith and Fulham 022C,Hammersmith and Fulham 022C,E38000070,NHS Hammersmith and Fulham CCG,E54000027,North West London Health and Care Partnership,,,,E09000013,Hammersmith and Fulham,E55000014,London,E12000007,London
3,SE59RW,"King's College Hospital, London",-0.093251,51.469505,E01003076,Lambeth 014C,Lambeth 014C,E38000092,NHS Lambeth CCG,E54000030,Our Healthier South East London,,,,E09000022,Lambeth,E55000014,London,E12000007,London
4,BR68ND,Princess Royal University Hospital,0.059146,51.366243,E01000751,Bromley 036C,Bromley 036C,E38000023,NHS Bromley CCG,E54000030,Our Healthier South East London,,,,E09000006,Bromley,E55000014,London,E12000007,London


Save this dataframe to file:

In [11]:
df_hospitals_and_lsoas.to_csv('hospitals_and_lsoas.csv', index=False)