# Match hospitals to LSOAs

Find which LSOA contains each hospital.

Use geojson files for the LSOAs. Match the coordinates of each LSOA shape with the coordinates of each hospital. Find the shape that contains the point.

## Setup

In [1]:
import pandas as pd
import numpy as np

# For importing geojson:
import json

# For creating new points:
# from geojson import Feature, Point #FeatureCollection
from shapely.geometry import shape, Point

## Import data

### Hospital locations

In [3]:
# df_hospitals = pd.read_csv('../data/stroke_hospitals_2022.csv')
df_hospitals = pd.read_csv('../data_tabular/stroke_teams_postcodes.csv')

df_hospitals.head()

Unnamed: 0,stroke_team,Postcode,Grid Reference,Easting,Northing,lat,long
0,Queen Elizabeth Hospital Edgbaston,B152TH,SP 04427 83926,404427,283926,52.453272,-1.936284
1,Sandwell District Hospital,B714HJ,SP 00919 92165,400919,292165,52.527357,-1.987884
2,Royal United Hospital Bath,BA13NG,ST 72922 66042,372922,166042,51.392764,-2.390566
3,Yeovil District Hospital,BA214AT,ST 55562 16398,355562,116398,50.945297,-2.633907
4,Royal Blackburn Hospital,BB23HH,SD 69305 26794,369305,426794,53.736622,-2.466806


In [4]:
hospital_points = []

for r in range(df_hospitals.shape[0]):
    # Get longitude and latitude from the dataframe
    # and convert it into a "Point" that the shape package can understand
    # and later compare with polygons of LSOA coordinates.
    long = df_hospitals['long'].iloc[r]
    lat = df_hospitals['lat'].iloc[r]
    point = Point(long, lat)

    hospital_points.append(point)

In [5]:
hospital_points

[<POINT (-1.936 52.453)>,
 <POINT (-1.988 52.527)>,
 <POINT (-2.391 51.393)>,
 <POINT (-2.634 50.945)>,
 <POINT (-2.467 53.737)>,
 <POINT (-1.795 53.806)>,
 <POINT (-2.255 53.601)>,
 <POINT (-0.364 50.817)>,
 <POINT (0.271 50.787)>,
 <POINT (-0.118 50.819)>,
 <POINT (0.06 51.366)>,
 <POINT (-2.593 51.497)>,
 <POINT (-2.596 51.459)>,
 <POINT (-2.971 51.322)>,
 <POINT (-2.955 54.896)>,
 <POINT (0.139 52.174)>,
 <POINT (-3.189 51.507)>,
 <POINT (-3.572 51.518)>,
 <POINT (-3.386 51.764)>,
 <POINT (-2.899 53.212)>,
 <POINT (-3.095 53.371)>,
 <POINT (0.465 51.775)>,
 <POINT (0.086 51.771)>,
 <POINT (0.899 51.91)>,
 <POINT (1.087 51.267)>,
 <POINT (1.389 51.378)>,
 <POINT (-1.444 52.421)>,
 <POINT (-1.583 52.29)>,
 <POINT (-2.476 53.118)>,
 <POINT (0.258 51.435)>,
 <POINT (-1.513 52.912)>,
 <POINT (-1.593 54.789)>,
 <POINT (-0.667 53.587)>,
 <POINT (-1.109 53.531)>,
 <POINT (-2.447 50.713)>,
 <POINT (-2.118 52.502)>,
 <POINT (-1.821 50.747)>,
 <POINT (-0.058 51.519)>,
 <POINT (-3.507 50.717)>

### Geojson containing all LSOAs:

In [7]:
# with open('./LSOA_2011.geojson') as f:
with open('../data_geojson/ons_data/LSOA_(Dec_2011)_Boundaries_Super_Generalised_Clipped_(BSC)_EW_V3.geojson') as f:
    geojson_ew = json.load(f)

In [8]:
# Fill these lists with the LSOAs containing the hospitals:
hospital_LSOA11CD = []
hospital_LSOA11NM = []
hospital_LSOA11NMW = []

for point in hospital_points:
    # Check each LSOA in turn to see if it contains this point.
    i = 0
    while i < len(geojson_ew['features']):
        # Get the coordinates and other info for this LSOA:
        feature = geojson_ew['features'][i]
        # Turn this into a shape:
        polygon = shape(feature['geometry'])
        if polygon.contains(point):
            # Save LSOA details to list:
            hospital_LSOA11CD.append(feature['properties']['LSOA11CD'])
            hospital_LSOA11NM.append(feature['properties']['LSOA11NM'])
            hospital_LSOA11NMW.append(feature['properties']['LSOA11NMW'])
            # Update the iterator so that the loop breaks:
            i = len(geojson_ew['features'])
        # Update the iterator for another go round the loop:
        i += 1

## Link the LSOAs with other regions

Import the big file we made earlier:

In [9]:
df_regions = pd.read_csv('../data_tabular/LSOA_regions.csv')

df_regions.head()

Unnamed: 0,LSOA11CD,LSOA11NM,LSOA11LONG,LSOA11LAT,CCG19CD,CCG19NM,STP19CD,STP19NM,LHB20CD,LHB20NM,LHB20NMW,LAD17CD,LAD17NM,SCN17CD,SCN17NM,RGN11CD,RGN11NM
0,E01031349,Adur 001A,-0.22737,50.83651,E38000213,NHS Coastal West Sussex CCG,E54000033,Sussex and East Surrey,,,,E07000223,Adur,E55000010,South East Coast,E12000008,South East
1,E01031350,Adur 001B,-0.22842,50.84244,E38000213,NHS Coastal West Sussex CCG,E54000033,Sussex and East Surrey,,,,E07000223,Adur,E55000010,South East Coast,E12000008,South East
2,E01031351,Adur 001C,-0.253,50.85845,E38000213,NHS Coastal West Sussex CCG,E54000033,Sussex and East Surrey,,,,E07000223,Adur,E55000010,South East Coast,E12000008,South East
3,E01031352,Adur 001D,-0.23812,50.8429,E38000213,NHS Coastal West Sussex CCG,E54000033,Sussex and East Surrey,,,,E07000223,Adur,E55000010,South East Coast,E12000008,South East
4,E01031370,Adur 001E,-0.24649,50.83958,E38000213,NHS Coastal West Sussex CCG,E54000033,Sussex and East Surrey,,,,E07000223,Adur,E55000010,South East Coast,E12000008,South East


In [10]:
all_regions = []

for LSOA in hospital_LSOA11CD:
    # Get region information for this hospital:
    regions_here = df_regions[df_regions['LSOA11CD'] == LSOA].values
    # Keep a copy of everything except the first two columns
    # (LSOA11 codes and names), which we already have ready:
    all_regions.append(regions_here[0][2:])

all_regions = np.array(all_regions, dtype=object)
all_regions

array([[-1.94449, 52.45242, 'E38000220', ..., 'West Midlands',
        'E12000005', 'West Midlands'],
       [-1.98593, 52.5221, 'E38000144', ..., 'West Midlands',
        'E12000005', 'West Midlands'],
       [-2.38922, 51.39029, 'E38000009', ..., 'South West', 'E12000009',
        'South West'],
       ...,
       [-2.19107, 52.18789, 'E38000166', ..., 'West Midlands',
        'E12000005', 'West Midlands'],
       [-2.09493, 52.59789, 'E38000210', ..., 'West Midlands',
        'E12000005', 'West Midlands'],
       [-1.08157, 53.97053, 'E38000188', ..., 'Yorkshire and The Humber',
        'E12000003', 'Yorkshire and The Humber']], dtype=object)

## Create a dataframe to save

In [11]:
# Gather the hospital names, locations, and containing LSOAs...
table = np.stack([
    df_hospitals['Postcode'],
    df_hospitals['stroke_team'],#['Stroke Team'],
    df_hospitals['long'],
    df_hospitals['lat'],
    hospital_LSOA11CD,
    hospital_LSOA11NM,
    hospital_LSOA11NMW,
    ], axis=-1)

# ... and then add the other region information from the file.
table = np.concatenate([table, all_regions], axis=1)

In [12]:
df_hospitals_and_lsoas = pd.DataFrame(
    table,
    columns=[
        'Postcode',
        'Stroke Team',
        'long',
        'lat',
        'LSOA11CD',
        'LSOA11NM',
        'LSOA11NMW',
        *df_regions.columns[2:]
    ]
)

df_hospitals_and_lsoas.head()

Unnamed: 0,Postcode,Stroke Team,long,lat,LSOA11CD,LSOA11NM,LSOA11NMW,LSOA11LONG,LSOA11LAT,CCG19CD,...,STP19NM,LHB20CD,LHB20NM,LHB20NMW,LAD17CD,LAD17NM,SCN17CD,SCN17NM,RGN11CD,RGN11NM
0,B152TH,Queen Elizabeth Hospital Edgbaston,-1.936284,52.453272,E01033562,Birmingham 087F,Birmingham 087F,-1.94449,52.45242,E38000220,...,Birmingham and Solihull,,,,E08000025,Birmingham,E55000008,West Midlands,E12000005,West Midlands
1,B714HJ,Sandwell District Hospital,-1.987884,52.527357,E01010104,Sandwell 017E,Sandwell 017E,-1.98593,52.5221,E38000144,...,The Black Country and West Birmingham,,,,E08000028,Sandwell,E55000008,West Midlands,E12000005,West Midlands
2,BA13NG,Royal United Hospital Bath,-2.390566,51.392764,E01014428,Bath and North East Somerset 008B,Bath and North East Somerset 008B,-2.38922,51.39029,E38000009,...,"Bath and North East Somerset, Swindon and Wilt...",,,,E06000022,Bath and North East Somerset,E55000009,South West,E12000009,South West
3,BA214AT,Yeovil District Hospital,-2.633907,50.945297,E01029231,South Somerset 014A,South Somerset 014A,-2.6334,50.95035,E38000150,...,Somerset,,,,E07000189,South Somerset,E55000009,South West,E12000009,South West
4,BB23HH,Royal Blackburn Hospital,-2.466806,53.736622,E01012632,Blackburn with Darwen 011F,Blackburn with Darwen 011F,-2.45667,53.73091,E38000014,...,Healthier Lancashire and South Cumbria,,,,E06000008,Blackburn with Darwen,E55000003,"Greater Manchester, Lancashire and South Cumbria",E12000002,North West


Save this dataframe to file:

In [13]:
df_hospitals_and_lsoas.to_csv('../data_tabular/hospitals_and_lsoas_descriptive_stats.csv', index=False)