# Clean stroke hospitals

## Summary

This notebook cleans stroke_hospitals_2022.csv, by adding a teams column to stroke_hospitals that matches teams in our SSNAP dataset.

## Set up

In [1]:
# Import packages
from dataclasses import dataclass
import numpy as np
import os
import pandas as pd

# Linting
%load_ext pycodestyle_magic
%pycodestyle_on

In [2]:
# Set paths and filenames
@dataclass(frozen=True)
class Paths:
    '''Singleton object for storing paths to data and database.'''

    raw_path: str = './data'
    data_filename: str = 'stroke_hospitals_2022.csv'

    clean_path: str = './output'
    save_filename: str = 'clean_stroke_hospitals_2022.csv'
    ssnap_filename: str = 'clean_samuel_ssnap_extract_v2.csv'

    database_filename: str = 'samuel.db'


paths = Paths()

In [3]:
# Load data
hospitals: pd.DataFrame = pd.read_csv(os.path.join(
    paths.raw_path, paths.data_filename), low_memory=False)
ssnap: pd.DataFrame = pd.read_csv(os.path.join(
    paths.clean_path, paths.ssnap_filename))

## Preview hospitals dataframe

In [4]:
display(hospitals.columns)
display(hospitals.head())

Index(['Postcode', 'Hospital_name', 'Use_IVT', 'Use_MT', 'Use_MSU', 'Country',
       'Strategic Clinical Network', 'Health Board / Trust', 'Stroke Team',
       'SSNAP name', 'Admissions 21/22', 'Thrombolysis', 'ivt_rate', 'Easting',
       'Northing', 'long', 'lat', 'Neuroscience',
       '30 England Thrombectomy Example', 'hospital_city', 'Notes'],
      dtype='object')

Unnamed: 0,Postcode,Hospital_name,Use_IVT,Use_MT,Use_MSU,Country,Strategic Clinical Network,Health Board / Trust,Stroke Team,SSNAP name,...,Thrombolysis,ivt_rate,Easting,Northing,long,lat,Neuroscience,30 England Thrombectomy Example,hospital_city,Notes
0,RM70AG,RM70AG,1,1,1,England,London SCN,Barking,Havering and Redbridge University Hospitals N...,Queens Hospital Romford HASU,...,117,11.9,551118,187780,0.179031,51.568647,1,0,Romford,
1,E11BB,E11BB,1,1,1,England,London SCN,Barts Health NHS Trust,The Royal London Hospital,Royal London Hospital HASU,...,115,13.4,534829,181798,-0.058133,51.519018,1,1,Royal London,
2,SW66SX,SW66SX,1,1,1,England,London SCN,Imperial College Healthcare NHS Trust,"Charing Cross Hospital, London",Charing Cross Hospital HASU,...,113,9.9,524226,176487,-0.212736,51.473717,1,1,Charing Cross,
3,SE59RW,SE59RW,1,1,1,England,London SCN,King's College Hospital NHS Foundation Trust,"King's College Hospital, London",King's College Hospital HASU,...,124,15.0,532536,176228,-0.093251,51.469505,1,0,Kings College,
4,BR68ND,BR68ND,1,0,0,England,London SCN,King's College Hospital NHS Foundation Trust,Princess Royal University Hospital,Princess Royal University Hospital HASU,...,113,13.3,543443,165032,0.059146,51.366243,0,0,Princess Royal,


## Add teams column to match data

In [5]:
# Merge hospitals with the stroke teams in the SSNAP data
data_names = pd.DataFrame({'data_team': ssnap['stroke_team'].unique()})
raw_teams = data_names.merge(
    hospitals, left_on='data_team', right_on='SSNAP name', how='left')
raw_teams.shape

(123, 22)

In [6]:
# Identify duplicates and remove incorrect matches
display(raw_teams[raw_teams.duplicated(subset=['data_team'], keep=False)])
teams = raw_teams[~raw_teams['Stroke Team'].isin([
    'Princess Royal Hospital, Haywards Heath',
    'Pinderfields Hospital, Wakefield'])]
teams.shape

Unnamed: 0,data_team,Postcode,Hospital_name,Use_IVT,Use_MT,Use_MSU,Country,Strategic Clinical Network,Health Board / Trust,Stroke Team,...,Thrombolysis,ivt_rate,Easting,Northing,long,lat,Neuroscience,30 England Thrombectomy Example,hospital_city,Notes
14,Peterborough City Hospital,PE39GZ,PE39GZ,1.0,0.0,0.0,England,East of England SCN,Peterborough and Stamford Hospitals NHS Founda...,Peterborough City Hospital,...,94.0,13.0,516674.0,299847.0,-0.279386,52.583921,0.0,0.0,Peterborough,
15,Peterborough City Hospital,WF14DG,WF14DG,1.0,0.0,0.0,England,Yorkshire and The Humber SCN,Mid Yorkshire Hospitals NHS Trust,"Pinderfields Hospital, Wakefield",...,94.0,13.0,433870.0,421896.0,-1.488555,53.692418,0.0,0.0,Wakefield,
83,Prince Philip Hospital,RH164EX,RH164EX,0.0,0.0,0.0,England,South East SCN,Brighton and Sussex University Hospitals NHS T...,"Princess Royal Hospital, Haywards Heath",...,15.0,9.3,534125.0,123035.0,-0.090282,50.991095,0.0,0.0,Haywards Heath,
84,Prince Philip Hospital,SA148QF,SA14 8QF,1.0,0.0,0.0,Wales,Wales,Wales,Prince Philip Hospital (Llanelli),...,15.0,9.3,252459.0,201367.0,-4.135974,51.691612,0.0,0.0,Llanelli,


(121, 22)

In [7]:
# Identity hospitals not merged
display(teams.loc[
    teams['SSNAP name'].isnull(), 'data_team'])

# Add those teams manually
medway = hospitals[hospitals['Stroke Team'] ==
                   'Medway Maritime Hospital, Gillingham']
teams.loc[teams['data_team'] == 'Medway Maritime Hospital',
          teams.columns != 'data_team'] = medway.to_numpy()[0]

telford = hospitals[hospitals['Stroke Team'] ==
                    'Princess Royal Hospital, Telford']
teams.loc[teams['data_team'] == 'Princess Royal Hospital Telford',
          teams.columns != 'data_team'] = telford.to_numpy()[0]

dorset = hospitals[hospitals['Stroke Team'] ==
                   'University Hospitals Dorset HASU']
teams.loc[teams['data_team'] == 'University Hospitals Dorset Stroke Service',
          teams.columns != 'data_team'] = dorset.to_numpy()[0]

haywards = hospitals[hospitals['Stroke Team'] ==
                     'Princess Royal Hospital, Haywards Heath']
teams.loc[teams['data_team'] == 'Princess Royal Hospital Haywards Heath',
          teams.columns != 'data_team'] = haywards.to_numpy()[0]

# View the matches
display(teams.loc[teams['data_team'].isin([
    'Medway Maritime Hospital',
    'Princess Royal Hospital Telford',
    'Princess Royal Hospital Haywards Heath',
    'University Hospitals Dorset Stroke Service']),
    ['data_team', 'Postcode', 'Country', 'Strategic Clinical Network',
     'Health Board / Trust', 'Stroke Team', 'SSNAP name']])

30                       Medway Maritime Hospital
93                Princess Royal Hospital Telford
106        Princess Royal Hospital Haywards Heath
113                   Princess Alexandra Hospital
121    University Hospitals Dorset Stroke Service
Name: data_team, dtype: object

Unnamed: 0,data_team,Postcode,Country,Strategic Clinical Network,Health Board / Trust,Stroke Team,SSNAP name
30,Medway Maritime Hospital,ME75NY,England,South East SCN,Medway NHS Foundation Trust,"Medway Maritime Hospital, Gillingham",None (for 72h care)
93,Princess Royal Hospital Telford,TF16TF,England,West Midlands SCN,Shrewsbury and Telford Hospital NHS Trust,"Princess Royal Hospital, Telford",Princess Of Wales Hospital
106,Princess Royal Hospital Haywards Heath,RH164EX,England,South East SCN,Brighton and Sussex University Hospitals NHS T...,"Princess Royal Hospital, Haywards Heath",Prince Philip Hospital
121,University Hospitals Dorset Stroke Service,DY12HQ,England,West Midlands SCN,Dudley Group of Hospitals NHS Foundation Trust,University Hospitals Dorset HASU,Russells Hall Hospital


There is one team that is in the SSNAP data but is not in stroke_hospitals_2022 - Princess Alexandra Hospital - likely due to closure (only 44 attendances in 2016 and 3 in 2017).

In [8]:
ssnap.loc[ssnap['stroke_team'] == 'Princess Alexandra Hospital',
          'year'].value_counts()

2016    44
2017     3
Name: year, dtype: int64

## Save data

In [9]:
teams.to_csv(os.path.join(paths.clean_path, paths.save_filename), index=False)