#### Set styling for plotting

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as ticker
import seaborn as sns
sns.set_palette('colorblind')
from matplotlib.pyplot import tight_layout
# ##SETTING PARAMS FOR MATPLOTLIB FIGURES
plt.rcParams.update({"figure.figsize": (6, 6),
                 "axes.facecolor": "white",
                 "axes.edgecolor": "black"})
plt.rcParams['axes.prop_cycle'] = plt.cycler(color=sns.color_palette('colorblind'))
##set font size
font = {'family': 'sans-serif',
       'weight': 'normal',
       'size': 14}
plt.rc('font', **font)
# ##PANDAS PLOTTING
pd.plotting.register_matplotlib_converters()

#### Step 1: save environment file

In [2]:
!conda env export > nas_environment.yml

#### Step 2: import modules

In [3]:
import glob
import os
import xml.etree.ElementTree as ET
from datetime import datetime, timezone
from tqdm import tqdm
import codecs
import csv
import numpy as np
!pip install openpyxl
%matplotlib inline

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com


In [4]:
data_directory_xl = "/home/paulharford/college/project/project_data/nas/WEATHERED Report CAD 20241217.xlsx"
full_path_xl = os.path.abspath(data_directory_xl)

In [6]:
##Import data from excel into panda's dataframe
nas_df = pd.read_excel(full_path_xl)

In [8]:
nas_df.head(10)

Unnamed: 0,Randomised ID,DateCallForPerf,Time of Day,PickupTown,EIRCODE ID,DespatchCode,DespCodeDescription,Hospital Attended
0,54959224086,2016-01-01,Night,DUNGLOE,,17B01P,Falls - Possibly Dangerous body Area - Public ...,LETTERKENNY GENERAL HOSPITAL
1,54959224146,2016-01-01,Night,BEAUMONT DUBLIN 9,,17B01G,Falls - Possibly Dangerous body Area - On the ...,BEAUMONT HOSPITAL
2,54959224172,2016-01-01,Night,DROGHEDA,,17B01,Falls - Possibly Dangerous body Area -,LOURDES HOSPITAL DROGHEDA
3,54959224182,2016-01-01,Night,BORRISOKANE,,17B01P,Falls - Possibly Dangerous body Area - Public ...,PORTIUNCULA GENERAL HOSPITAL
4,54959224201,2016-01-01,Night,WEXFORD,,17B01G,Falls - Possibly Dangerous body Area - On the ...,
5,54959224289,2016-01-01,Night,KILLARNEY,,17D04G,Falls - Not Alert - On the ground or floor,KERRY UNIVERSITY HOSPITAL
6,54959224429,2016-01-01,Night,RATHNEENY WEST LAGHEY,,17B01,Falls - Possibly Dangerous body Area -,SLIGO GENERAL HOSPITAL
7,54959224658,2016-01-01,Morning,GLENAGEARY,,17B00G,Falls - Bravo Override - On the ground or floor,ST VINCENTS HOSPITAL
8,54959224681,2016-01-01,Morning,DUNDALK,,17B01,Falls - Possibly Dangerous body Area -,LOURDES HOSPITAL DROGHEDA
9,54959224682,2016-01-01,Morning,CAUSEWAY,,17B01G,Falls - Possibly Dangerous body Area - On the ...,KERRY UNIVERSITY HOSPITAL


In [10]:
unique_hospitals = nas_df["Hospital Attended"].unique()
print(unique_hospitals)


['LETTERKENNY GENERAL HOSPITAL' 'BEAUMONT HOSPITAL'
 'LOURDES HOSPITAL DROGHEDA' 'PORTIUNCULA GENERAL HOSPITAL' nan
 'KERRY UNIVERSITY HOSPITAL' 'SLIGO GENERAL HOSPITAL'
 'ST VINCENTS HOSPITAL' 'UNIVERSITY HOSPITAL LIMERICK'
 'SOUTH TIPPERARY GENERAL' 'MULLINGAR REGIONAL HOSPITAL'
 'NAAS GENERAL HOSPITAL' 'CORK UNIVERSITY HOSPITAL'
 'PORTLAOISE GENERAL HOSPITAL' 'CONNOLLY MEMORIAL HOSPITAL'
 'CAVAN GENERAL HOSPITAL' 'MAYO GENERAL HOSPITAL'
 'UNIVERSITY HOSPITAL GALWAY' 'WATERFORD UNIVERSITY HOSPITAL'
 'TULLAMORE GENERAL HOSPITAL' 'WEXFORD GENERAL HOSPITAL'
 'ST JAMES HOSPITAL' 'MERCY UNIVERSITY HOSPITAL'
 'ST MICHAELS DUN LAOGHAIRE' 'OUR LADYS NAVAN' 'TALLAGHT HOSPITAL'
 'MATER HOSPITAL DUBLIN' 'BANTRY GENERAL HOSPITAL'
 'ST LUKES HOSPITAL KILKENNY' 'SACRED HEART HOSPITAL MAYO'
 'ST COLUMCILLES HOSPITAL LOUGHLINSTOWN' 'MALLOW GENERAL HOSPITAL'
 'DAISY HILL HOSPTIAL NEWRY' 'ALTNAGELVIN HOSPITAL'
 'CORK UNIVERSITY MATERNITY HOSP' 'BON SECOURS HOSPITAL CORK'
 'ENNIS GENERAL HOSPITAL' 'ROS

In [20]:
##add regional information
hse_mapping = {
    "HSE Dublin and North East": [
        "BEAUMONT HOSPITAL", "LOURDES HOSPITAL DROGHEDA", "CAVAN GENERAL HOSPITAL",
        "OUR LADYS NAVAN", "MATER HOSPITAL DUBLIN", "CONNOLLY MEMORIAL HOSPITAL",
        "LOUTH COUNTY HOSPITAL"
    ],
    "HSE Dublin and Midlands": [
        "MULLINGAR REGIONAL HOSPITAL", "NAAS GENERAL HOSPITAL",
        "PORTLAOISE GENERAL HOSPITAL", "TULLAMORE GENERAL HOSPITAL",
        "ST JAMES HOSPITAL", "TALLAGHT HOSPITAL"
    ],
    "HSE Dublin and South East": [
        "ST VINCENTS HOSPITAL", "ST MICHAELS DUN LAOGHAIRE", "HOLLES STREET MATERNITY HOSPITAL",
        "ST COLUMCILLES HOSPITAL LOUGHLINSTOWN", "ST LUKES HOSPITAL KILKENNY",
        "SOUTH TIPPERARY GENERAL HOSPITAL", "WATERFORD UNIVERSITY HOSPITAL",
        "WEXFORD GENERAL HOSPITAL","SOUTH TIPPERARY GENERAL"
    ],
    "HSE South East": [
        "CORK UNIVERSITY HOSPITAL", "CORK UNIVERSITY MATERNITY HOSP",
        "MERCY UNIVERSITY HOSPITAL", "MALLOW GENERAL HOSPITAL", "BANTRY GENERAL HOSPITAL",
        "SOUTH INFIRMARY-VICTORIA UNIVERSITY HOSPITAL", "ST FINBARRS HOSPITAL CORK", "KERRY UNIVERSITY HOSPITAL"
    ],
    "HSE Mid West": [
        "UNIVERSITY HOSPITAL LIMERICK", "ENNIS GENERAL HOSPITAL", "NENAGH GENERAL HOSPITAL",
        "ST JOHNS HOSPITAL LIMERICK", "LIMERICK MATERNITY HOSPITAL"
    ],
    "HSE West and North West": [
        "LETTERKENNY GENERAL HOSPITAL", "PORTIUNCULA GENERAL HOSPITAL",
        "MAYO GENERAL HOSPITAL", "UNIVERSITY HOSPITAL GALWAY", "ROSCOMMON GENERAL HOSPITAL",
        "SLIGO GENERAL HOSPITAL", "MERLIN PARK HOSPITAL"
    ]
}



In [21]:
def categorize_hospital(hospital_name):
    # Check if hospital_name is a valid string, otherwise return "Hospital_Unknown"
    if not isinstance(hospital_name, str):
        return "Unknown"
    
    for region, hospitals in hse_mapping.items():
        if hospital_name.upper() in hospitals:
            return region
    return "Hospital Unknown"

In [22]:
nas_df["region"] = nas_df["Hospital Attended"].apply(categorize_hospital)

In [23]:
nas_df.head(15)

Unnamed: 0,Randomised ID,DateCallForPerf,Time of Day,PickupTown,EIRCODE ID,DespatchCode,DespCodeDescription,Hospital Attended,region
0,54959224086,2016-01-01,Night,DUNGLOE,,17B01P,Falls - Possibly Dangerous body Area - Public ...,LETTERKENNY GENERAL HOSPITAL,HSE West and North West
1,54959224146,2016-01-01,Night,BEAUMONT DUBLIN 9,,17B01G,Falls - Possibly Dangerous body Area - On the ...,BEAUMONT HOSPITAL,HSE Dublin and North East
2,54959224172,2016-01-01,Night,DROGHEDA,,17B01,Falls - Possibly Dangerous body Area -,LOURDES HOSPITAL DROGHEDA,HSE Dublin and North East
3,54959224182,2016-01-01,Night,BORRISOKANE,,17B01P,Falls - Possibly Dangerous body Area - Public ...,PORTIUNCULA GENERAL HOSPITAL,HSE West and North West
4,54959224201,2016-01-01,Night,WEXFORD,,17B01G,Falls - Possibly Dangerous body Area - On the ...,,Unknown
5,54959224289,2016-01-01,Night,KILLARNEY,,17D04G,Falls - Not Alert - On the ground or floor,KERRY UNIVERSITY HOSPITAL,HSE South East
6,54959224429,2016-01-01,Night,RATHNEENY WEST LAGHEY,,17B01,Falls - Possibly Dangerous body Area -,SLIGO GENERAL HOSPITAL,HSE West and North West
7,54959224658,2016-01-01,Morning,GLENAGEARY,,17B00G,Falls - Bravo Override - On the ground or floor,ST VINCENTS HOSPITAL,HSE Dublin and South East
8,54959224681,2016-01-01,Morning,DUNDALK,,17B01,Falls - Possibly Dangerous body Area -,LOURDES HOSPITAL DROGHEDA,HSE Dublin and North East
9,54959224682,2016-01-01,Morning,CAUSEWAY,,17B01G,Falls - Possibly Dangerous body Area - On the ...,KERRY UNIVERSITY HOSPITAL,HSE South East


In [19]:
##check for NaN/null values
(nas_df.isna().sum()/nas_df.shape[0] * 100).sort_values()

Randomised ID           0.000000
DateCallForPerf         0.000000
Time of Day             0.000000
DespatchCode            0.000000
DespCodeDescription     0.000000
region                  0.000000
PickupTown              0.008376
EIRCODE ID              8.379831
Hospital Attended      15.922065
dtype: float64

In [28]:
unknown_hospitals = nas_df[nas_df["region"] == "Unknown"]["Hospital Attended"].unique()
print(unknown_hospital

[nan]


In [None]:
nas_df.to_csv('/home/paulharford/college/project/processed/nas_clean.csv', index=False)