#### Set styling for plotting

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as ticker
import seaborn as sns
sns.set_palette('colorblind')
from matplotlib.pyplot import tight_layout
# ##SETTING PARAMS FOR MATPLOTLIB FIGURES
plt.rcParams.update({"figure.figsize": (6, 6),
                 "axes.facecolor": "white",
                 "axes.edgecolor": "black"})
plt.rcParams['axes.prop_cycle'] = plt.cycler(color=sns.color_palette('colorblind'))
##set font size
font = {'family': 'sans-serif',
       'weight': 'normal',
       'size': 14}
plt.rc('font', **font)
# ##PANDAS PLOTTING
pd.plotting.register_matplotlib_converters()

#### Step 1: save environment file

In [2]:
!conda env export > nas_environment.yml

#### Step 2: import modules

In [3]:
import glob
import os
import xml.etree.ElementTree as ET
from datetime import datetime, timezone
from tqdm import tqdm
import codecs
import csv
import numpy as np
!pip install openpyxl
%matplotlib inline

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com


In [4]:
data_directory_xl = "/home/paulharford/college/project/project_data/nas/WEATHERED Report CAD 20241217.xlsx"
full_path_xl = os.path.abspath(data_directory_xl)

In [5]:
##Import data from excel into panda's dataframe
nas_df = pd.read_excel(full_path_xl)

In [6]:
nas_df.head(10)

Unnamed: 0,Randomised ID,DateCallForPerf,Time of Day,PickupTown,EIRCODE ID,DespatchCode,DespCodeDescription,Hospital Attended
0,54959224086,2016-01-01,Night,DUNGLOE,,17B01P,Falls - Possibly Dangerous body Area - Public ...,LETTERKENNY GENERAL HOSPITAL
1,54959224146,2016-01-01,Night,BEAUMONT DUBLIN 9,,17B01G,Falls - Possibly Dangerous body Area - On the ...,BEAUMONT HOSPITAL
2,54959224172,2016-01-01,Night,DROGHEDA,,17B01,Falls - Possibly Dangerous body Area -,LOURDES HOSPITAL DROGHEDA
3,54959224182,2016-01-01,Night,BORRISOKANE,,17B01P,Falls - Possibly Dangerous body Area - Public ...,PORTIUNCULA GENERAL HOSPITAL
4,54959224201,2016-01-01,Night,WEXFORD,,17B01G,Falls - Possibly Dangerous body Area - On the ...,
5,54959224289,2016-01-01,Night,KILLARNEY,,17D04G,Falls - Not Alert - On the ground or floor,KERRY UNIVERSITY HOSPITAL
6,54959224429,2016-01-01,Night,RATHNEENY WEST LAGHEY,,17B01,Falls - Possibly Dangerous body Area -,SLIGO GENERAL HOSPITAL
7,54959224658,2016-01-01,Morning,GLENAGEARY,,17B00G,Falls - Bravo Override - On the ground or floor,ST VINCENTS HOSPITAL
8,54959224681,2016-01-01,Morning,DUNDALK,,17B01,Falls - Possibly Dangerous body Area -,LOURDES HOSPITAL DROGHEDA
9,54959224682,2016-01-01,Morning,CAUSEWAY,,17B01G,Falls - Possibly Dangerous body Area - On the ...,KERRY UNIVERSITY HOSPITAL


In [7]:
unique_hospitals = nas_df["Hospital Attended"].unique()
print(unique_hospitals)


['LETTERKENNY GENERAL HOSPITAL' 'BEAUMONT HOSPITAL'
 'LOURDES HOSPITAL DROGHEDA' 'PORTIUNCULA GENERAL HOSPITAL' nan
 'KERRY UNIVERSITY HOSPITAL' 'SLIGO GENERAL HOSPITAL'
 'ST VINCENTS HOSPITAL' 'UNIVERSITY HOSPITAL LIMERICK'
 'SOUTH TIPPERARY GENERAL' 'MULLINGAR REGIONAL HOSPITAL'
 'NAAS GENERAL HOSPITAL' 'CORK UNIVERSITY HOSPITAL'
 'PORTLAOISE GENERAL HOSPITAL' 'CONNOLLY MEMORIAL HOSPITAL'
 'CAVAN GENERAL HOSPITAL' 'MAYO GENERAL HOSPITAL'
 'UNIVERSITY HOSPITAL GALWAY' 'WATERFORD UNIVERSITY HOSPITAL'
 'TULLAMORE GENERAL HOSPITAL' 'WEXFORD GENERAL HOSPITAL'
 'ST JAMES HOSPITAL' 'MERCY UNIVERSITY HOSPITAL'
 'ST MICHAELS DUN LAOGHAIRE' 'OUR LADYS NAVAN' 'TALLAGHT HOSPITAL'
 'MATER HOSPITAL DUBLIN' 'BANTRY GENERAL HOSPITAL'
 'ST LUKES HOSPITAL KILKENNY' 'SACRED HEART HOSPITAL MAYO'
 'ST COLUMCILLES HOSPITAL LOUGHLINSTOWN' 'MALLOW GENERAL HOSPITAL'
 'DAISY HILL HOSPTIAL NEWRY' 'ALTNAGELVIN HOSPITAL'
 'CORK UNIVERSITY MATERNITY HOSP' 'BON SECOURS HOSPITAL CORK'
 'ENNIS GENERAL HOSPITAL' 'ROS

In [8]:
##add regional information
hse_mapping = {
    "HSE Dublin and North East": [
        "BEAUMONT HOSPITAL", "LOURDES HOSPITAL DROGHEDA", "CAVAN GENERAL HOSPITAL",
        "OUR LADYS NAVAN", "MATER HOSPITAL DUBLIN", "CONNOLLY MEMORIAL HOSPITAL",
        "LOUTH COUNTY HOSPITAL"
    ],
    "HSE Dublin and Midlands": [
        "MULLINGAR REGIONAL HOSPITAL", "NAAS GENERAL HOSPITAL",
        "PORTLAOISE GENERAL HOSPITAL", "TULLAMORE GENERAL HOSPITAL",
        "ST JAMES HOSPITAL", "TALLAGHT HOSPITAL"
    ],
    "HSE Dublin and South East": [
        "ST VINCENTS HOSPITAL", "ST MICHAELS DUN LAOGHAIRE", "HOLLES STREET MATERNITY HOSPITAL",
        "ST COLUMCILLES HOSPITAL LOUGHLINSTOWN", "ST LUKES HOSPITAL KILKENNY",
        "SOUTH TIPPERARY GENERAL HOSPITAL", "WATERFORD UNIVERSITY HOSPITAL",
        "WEXFORD GENERAL HOSPITAL","SOUTH TIPPERARY GENERAL"
    ],
    "HSE South East": [
        "CORK UNIVERSITY HOSPITAL", "CORK UNIVERSITY MATERNITY HOSP",
        "MERCY UNIVERSITY HOSPITAL", "MALLOW GENERAL HOSPITAL", "BANTRY GENERAL HOSPITAL",
        "SOUTH INFIRMARY-VICTORIA UNIVERSITY HOSPITAL", "ST FINBARRS HOSPITAL CORK", "KERRY UNIVERSITY HOSPITAL"
    ],
    "HSE Mid West": [
        "UNIVERSITY HOSPITAL LIMERICK", "ENNIS GENERAL HOSPITAL", "NENAGH GENERAL HOSPITAL",
        "ST JOHNS HOSPITAL LIMERICK", "LIMERICK MATERNITY HOSPITAL"
    ],
    "HSE West and North West": [
        "LETTERKENNY GENERAL HOSPITAL", "PORTIUNCULA GENERAL HOSPITAL",
        "MAYO GENERAL HOSPITAL", "UNIVERSITY HOSPITAL GALWAY", "ROSCOMMON GENERAL HOSPITAL",
        "SLIGO GENERAL HOSPITAL", "MERLIN PARK HOSPITAL"
    ]
}



In [9]:
def categorize_hospital(hospital_name):
    # Check if hospital_name is a valid string, otherwise return "Hospital_Unknown"
    if not isinstance(hospital_name, str):
        return "Unknown"
    
    for region, hospitals in hse_mapping.items():
        if hospital_name.upper() in hospitals:
            return region
    return "Hospital Unknown"

In [10]:
nas_df["region"] = nas_df["Hospital Attended"].apply(categorize_hospital)

In [11]:
nas_df.head(15)

Unnamed: 0,Randomised ID,DateCallForPerf,Time of Day,PickupTown,EIRCODE ID,DespatchCode,DespCodeDescription,Hospital Attended,region
0,54959224086,2016-01-01,Night,DUNGLOE,,17B01P,Falls - Possibly Dangerous body Area - Public ...,LETTERKENNY GENERAL HOSPITAL,HSE West and North West
1,54959224146,2016-01-01,Night,BEAUMONT DUBLIN 9,,17B01G,Falls - Possibly Dangerous body Area - On the ...,BEAUMONT HOSPITAL,HSE Dublin and North East
2,54959224172,2016-01-01,Night,DROGHEDA,,17B01,Falls - Possibly Dangerous body Area -,LOURDES HOSPITAL DROGHEDA,HSE Dublin and North East
3,54959224182,2016-01-01,Night,BORRISOKANE,,17B01P,Falls - Possibly Dangerous body Area - Public ...,PORTIUNCULA GENERAL HOSPITAL,HSE West and North West
4,54959224201,2016-01-01,Night,WEXFORD,,17B01G,Falls - Possibly Dangerous body Area - On the ...,,Unknown
5,54959224289,2016-01-01,Night,KILLARNEY,,17D04G,Falls - Not Alert - On the ground or floor,KERRY UNIVERSITY HOSPITAL,HSE South East
6,54959224429,2016-01-01,Night,RATHNEENY WEST LAGHEY,,17B01,Falls - Possibly Dangerous body Area -,SLIGO GENERAL HOSPITAL,HSE West and North West
7,54959224658,2016-01-01,Morning,GLENAGEARY,,17B00G,Falls - Bravo Override - On the ground or floor,ST VINCENTS HOSPITAL,HSE Dublin and South East
8,54959224681,2016-01-01,Morning,DUNDALK,,17B01,Falls - Possibly Dangerous body Area -,LOURDES HOSPITAL DROGHEDA,HSE Dublin and North East
9,54959224682,2016-01-01,Morning,CAUSEWAY,,17B01G,Falls - Possibly Dangerous body Area - On the ...,KERRY UNIVERSITY HOSPITAL,HSE South East


In [12]:
##check for NaN/null values
(nas_df.isna().sum()/nas_df.shape[0] * 100).sort_values()

Randomised ID           0.000000
DateCallForPerf         0.000000
Time of Day             0.000000
DespatchCode            0.000000
DespCodeDescription     0.000000
region                  0.000000
PickupTown              0.008376
EIRCODE ID              8.379831
Hospital Attended      15.922065
dtype: float64

In [13]:
##for the hospital attended that are nana if we have the pickup town we can infer the Hospital attended
## this is not always teh case as patients can be brought to hospitals outsode of their region
## but as 15% of the data is missing its reasonable to assume that the majoring of patiemst went to theire local hospital
unknown_hospitals_df = nas_df[nas_df["region"] == "Unknown"][["Hospital Attended", "PickupTown"]].drop_duplicates()
unknown_hospitals_df.head()



Unnamed: 0,Hospital Attended,PickupTown
4,,WEXFORD
16,,CARROWCUBIC BALLYCASTLE
23,,SHANKILL DUBLIN 18
30,,CELBRIDGE
46,,NAVAN


In [14]:
unique_pickup_towns = unknown_hospitals_df["PickupTown"].unique()

# Print the unique PickupTown values
print(unique_pickup_towns)

['WEXFORD' 'CARROWCUBIC BALLYCASTLE' 'SHANKILL DUBLIN 18' ...
 'MASINASS CREESLOUGH' 'CARRIGLEA DUNGARVAN' 'KILLAREE CHARLEVILLE']


In [15]:
nas_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 155206 entries, 0 to 155205
Data columns (total 9 columns):
 #   Column               Non-Null Count   Dtype         
---  ------               --------------   -----         
 0   Randomised ID        155206 non-null  int64         
 1   DateCallForPerf      155206 non-null  datetime64[ns]
 2   Time of Day          155206 non-null  object        
 3   PickupTown           155193 non-null  object        
 4   EIRCODE ID           142200 non-null  object        
 5   DespatchCode         155206 non-null  object        
 6   DespCodeDescription  155206 non-null  object        
 7   Hospital Attended    130494 non-null  object        
 8   region               155206 non-null  object        
dtypes: datetime64[ns](1), int64(1), object(7)
memory usage: 10.7+ MB


In [16]:
# ignore NaN  only look at str
filtered_pickup_towns = sorted([str(value) for value in unique_pickup_towns if isinstance(value, str)])

# Convert to DataFrame
pickup_towns_df = pd.DataFrame(filtered_pickup_towns, columns=["PickupTown"])

# Save to CSV
pickup_towns_df.to_csv("/home/paulharford/college/project/project_data/nas/unique_pickup_towns.csv", index=False)

print("CSV file saved: unique_pickup_towns.csv")


CSV file saved: unique_pickup_towns.csv


In [17]:
town_to_county = {
    "Achill": "Mayo",
    "Adare": "Limerick",
    "Aghada": "Cork",
    "Ahascragh": "Galway",
    "Aherla": "Cork",
    "Aherlow": "Tipperary",
    "Aille": "Galway",
    "Aillroe": "Clare",
    "Ailt an Chorráin": "Donegal",
    "Aithnín": "Galway",
    "Allen": "Kildare",
    "Allenwood": "Kildare",
    "Allihies": "Cork",
    "An Cheathrú Rua": "Galway",
    "An Clochán Liath": "Donegal",
    "An Fál Carrach": "Donegal",
    "An Móta": "Galway",
    "An Spidéal": "Galway",
    "An Uaimh": "Meath",
    "Anascaul": "Kerry",
    "Annacotty": "Limerick",
    "Annagassan": "Louth",
    "Annagry": "Donegal",
    "Annascaul": "Kerry",
    "Ardagh": "Longford",
    "Ardara": "Donegal",
    "Ardee": "Louth",
    "Ardfert": "Kerry",
    "Ardfinnan": "Tipperary",
    "Ardmore": "Waterford",
    "Ardrahan": "Galway",
    "Ardrossan": "Down",
    "Arklow": "Wicklow",
    "Arless": "Laois",
    "Armagh": "Armagh",
    "Arva": "Cavan",
    "Askeaton": "Limerick",
    "Athenry": "Galway",
    "Athlone": "Westmeath",
    "Athleague": "Roscommon",
    "Athlacca": "Limerick",
    "Attymon": "Galway",
    "Aughrim": "Galway",
    "Aughnacliffe": "Longford",
    "Aughnacloy": "Tyrone",
    "Aughrim": "Wicklow",
    "Avoca": "Wicklow",
    "Bagenalstown": "Carlow",
    "Bailieborough": "Cavan",
    "Balbriggan": "Dublin",
    "Baldoyle": "Dublin",
    "Balla": "Mayo",
    "Ballaghaderreen": "Roscommon",
    "Ballina": "Mayo",
    "Ballinagh": "Cavan",
    "Ballinakill": "Laois",
    "Ballinalee": "Longford",
    "Ballinamore": "Leitrim",
    "Ballinasloe": "Galway",
    "Ballincollig": "Cork",
    "Ballindine": "Mayo",
    "Ballineen": "Cork",
    "Ballingarry": "Limerick",
    "Ballinhassig": "Cork",
    "Ballinlough": "Roscommon",
    "Ballinrobe": "Mayo",
    "Ballinspittle": "Cork",
    "Ballinteer": "Dublin",
    "Ballintra": "Donegal",
    "Ballintubber": "Mayo",
    "Ballisodare": "Sligo",
    "Ballivor": "Meath",
    "Ballybay": "Monaghan",
    "Ballybofey": "Donegal",
    "Ballyboughal": "Dublin",
    "Ballycastle": "Mayo",
    "Ballyconnell": "Cavan",
    "Ballycotton": "Cork",
    "Ballycroy": "Mayo",
    "Ballydehob": "Cork",
    "Ballydesmond": "Cork",
    "Ballyduff": "Waterford",
    "Ballyfarnon": "Roscommon",
    "Ballyferriter": "Kerry",
    "Ballyfoyle": "Kilkenny",
    "Ballygar": "Galway",
    "Ballygarrett": "Wexford",
    "Ballyhaise": "Cavan",
    "Ballyhale": "Kilkenny",
    "Ballyhaunis": "Mayo",
    "Ballyheigue": "Kerry",
    "Ballyjamesduff": "Cavan",
    "Ballylanders": "Limerick",
    "Ballyleague": "Roscommon",
    "Ballylinan": "Laois",
    "Ballylongford": "Kerry",
    "Ballylynan": "Laois",
    "Ballymahon": "Longford",
    "Ballymote": "Sligo",
    "Ballymun": "Dublin",
    "Ballynahinch": "Down",
    "Ballynacally": "Clare",
    "Ballynacargy": "Westmeath",
    "Ballynahinch": "Galway",
    "Ballyragget": "Kilkenny",
    "Ballyroan": "Laois",
    "Ballysadare": "Sligo",
    "Ballyshannon": "Donegal",
    "Ballyvaughan": "Clare",
    "Ballywalter": "Down",
    "Balrothery": "Dublin",
    "Baltinglass": "Wicklow",
    "Bandon": "Cork",
    "Bantry": "Cork",
    "Bawnboy": "Cavan",
    "Bearna": "Galway",
    "Beaufort": "Kerry",
    "Bective": "Meath",
    "Belcoo": "Fermanagh",
    "Belfast": "Antrim",
    "Bellaghy": "Londonderry",
    "Bellahy": "Sligo",
    "Bellavary": "Mayo",
    "Belleek": "Fermanagh",
    "Belmullet": "Mayo",
    "Belturbet": "Cavan",
    "Bennettsbridge": "Kilkenny",
    "Berrings": "Cork",
    "Bettystown": "Meath",
    "Bishopstown": "Cork",
    "Blacklion": "Cavan",
    "Blackrock": "Dublin",
    "Blackwater": "Wexford",
    "Blessington": "Wicklow",
    "Blarney": "Cork",
    "Blennerville": "Kerry",
    "Boherbue": "Cork",
    "Borris": "Carlow",
    "Borris-in-Ossory": "Laois",
    "Borrisokane": "Tipperary",
    "Borrisoleigh": "Tipperary",
    "Bray": "Wicklow",
    "Bruree": "Limerick",
    "Bunclody": "Wexford",
    "Bundoran": "Donegal",
    "Bunmahon": "Waterford",
    "Bunratty": "Clare",
    "Buttevant": "Cork",
    "Cabinteely": "Dublin",
    "Cahersiveen": "Kerry",
    "Cahir": "Tipperary",
    "Cahirciveen": "Kerry",
    "Cappamore": "Limerick",
    "Cappawhite": "Tipperary",
    "Carbury": "Kildare",
    "Carlingford": "Louth",
    "Carlow": "Carlow",
    "Carraroe": "Galway",
    "Carndonagh": "Donegal",
    "Carrick-on-Shannon": "Leitrim",
    "Carrick-on-Suir": "Tipperary",
    "Carrickmacross": "Monaghan",
    "Carrigaline": "Cork",
    "Carrigallen": "Leitrim",
    "Carrigtwohill": "Cork",
    "Cashel": "Tipperary",
    "Castlebar": "Mayo",
    "Castleblayney": "Monaghan",
    "Castlecomer": "Kilkenny",
    "Castleconnell": "Limerick",
    "Castledermot": "Kildare",
    "Castleisland": "Kerry",
    "Castleknock": "Dublin",
    "Castlemartyr": "Cork",
    "Castlerea": "Roscommon",
    "Castletownbere": "Cork",
    "Castletownshend": "Cork",
    "Celbridge": "Kildare",
    "Clane": "Kildare",
    "Clara": "Offaly",
    "Claremorris": "Mayo",
    "Clifden": "Galway",
    "Clogherhead": "Louth",
    "Clonakilty": "Cork",
    "Clondalkin": "Dublin",
    "Clonegal": "Carlow",
    "Clones": "Monaghan",
    "Clonmel": "Tipperary",
    "Clonroche": "Wexford",
    "Coachford": "Cork",
    "Collon": "Louth",
    "Cong": "Mayo",
    "Convoy": "Donegal",
    "Cootehill": "Cavan",
    "Cork": "Cork",
    "Courtown": "Wexford",
    "Crumlin": "Dublin",
    "Dalkey": "Dublin",
    "Dingle": "Kerry",
    "Doolin": "Clare",
    "Drogheda": "Louth",
    "Dromahair": "Leitrim",
    "Dromore": "Down",
    "Dublin": "Dublin",
    "Dun Laoghaire": "Dublin",
    "Dundalk": "Louth",
    "Dunfanaghy": "Donegal",
    "Dungarvan": "Waterford",
    "Dungloe": "Donegal",
    "Dunleer": "Louth",
    "Dunmanway": "Cork",
    "Edenderry": "Offaly",
    "Enfield": "Meath",
    "Ennis": "Clare",
    "Enniscorthy": "Wexford",
    "Ennistymon": "Clare",
    "Fermoy": "Cork",
    "Fethard": "Tipperary",
    "Foxford": "Mayo",
    "Galway": "Galway",
    "Glenties": "Donegal",
    "Gorey": "Wexford",
    "Gort": "Galway",
    "Graiguenamanagh": "Kilkenny",
    "Granard": "Longford",
    "Greystones": "Wicklow",
    "Headford": "Galway",
    "Holywood": "Down",
    "Kilbeggan": "Westmeath",
    "Kilcock": "Kildare",
    "Kilcoole": "Wicklow",
    "Kilcullen": "Kildare",
    "Kildare": "Kildare",
    "Kilkee": "Clare",
    "Kilkenny": "Kilkenny",
    "Killarney": "Kerry",
    "Killorglin": "Kerry",
    "Kilrush": "Clare",
    "Kiltimagh": "Mayo",
    "Kinnegad": "Westmeath",
    "Laois": "Laois",
    "Leixlip": "Kildare",
    "Letterkenny": "Donegal",
    "Lifford": "Donegal",
    "Limerick": "Limerick",
    "Lisburn": "Antrim",
    "Lisdoonvarna": "Clare",
    "Longford": "Longford",
    "Loughrea": "Galway",
    "Lucan": "Dublin",
    "Malahide": "Dublin",
    "Mallow": "Cork",
    "Maynooth": "Kildare",
    "Midleton": "Cork",
    "Mitchelstown": "Cork",
    "Monaghan": "Monaghan",
    "Mullingar": "Westmeath",
    "Naas": "Kildare",
    "Navan": "Meath",
    "Nenagh": "Tipperary",
    "Newbridge": "Kildare",
    "Newcastle West": "Limerick",
    "Newmarket-on-Fergus": "Clare",
    "Newport": "Mayo",
    "Newry": "Down",
    "Newtownards": "Down",
    "Oughterard": "Galway",
    "Portarlington": "Laois",
    "Portlaoise": "Laois",
    "Rathangan": "Kildare",
    "Rathdowney": "Laois",
    "Roscommon": "Roscommon",
    "Rush": "Dublin",
    "Sallins": "Kildare",
    "Shannon": "Clare",
    "Skibbereen": "Cork",
    "Sligo": "Sligo",
    "Strabane": "Tyrone",
    "Terenure": "Dublin",
    "Thurles": "Tipperary",
    "Tipperary": "Tipperary",
    "Trim": "Meath",
    "Tuam": "Galway",
    "Tullamore": "Offaly",
    "Tullow": "Carlow",
    "Waterford": "Waterford",
    "Westport": "Mayo",
    "Wexford": "Wexford",
    "Wicklow": "Wicklow",
    "Youghal": "Cork"
}




In [18]:
# HSE Regions mapping
hse_regions = {
    "HSE Dublin and North East": ["Cavan", "Monaghan", "Louth", "Meath", "Dublin"],
    "HSE Dublin and Midlands": ["Kildare", "Wicklow", "Laois", "Offaly", "Longford", "Westmeath", "Dublin"],
    "HSE Dublin and South East": ["Carlow", "Kilkenny", "Tipperary", "Waterford", "Wexford", "Wicklow", "Dublin"],
    "HSE Mid West": ["Clare", "Limerick", "Tipperary"],
    "HSE South West": ["Cork", "Kerry"],
    "HSE West and North West": ["Donegal", "Sligo", "Leitrim", "Mayo", "Galway", "Roscommon"]
}

# get HSE region from county
def get_hse_region(county):
    for region, counties in hse_regions.items():
        if county in counties:
            return region
    return None

# Fill missing values based on town mapping
def fill_hospital_attended(row):
    if pd.isna(row["Hospital Attended"]):  # Check if Hospital Attended is missing
        town = row["PickupTown"]
        county = town_to_county.get(town, None)
        if county:
            hse_region = get_hse_region(county)
            if hse_region:
                return f"Hospital in {hse_region}"
        return "Unknown"  # If town isn't in mapping
    return row["Hospital Attended"]  # Keep existing value

# Apply function to DataFrame
nas_df["Hospital Attended"] = nas_df.apply(fill_hospital_attended, axis=1)



In [19]:
##check for NaN/null values
(nas_df.isna().sum()/nas_df.shape[0] * 100).sort_values()

Randomised ID          0.000000
DateCallForPerf        0.000000
Time of Day            0.000000
DespatchCode           0.000000
Hospital Attended      0.000000
DespCodeDescription    0.000000
region                 0.000000
PickupTown             0.008376
EIRCODE ID             8.379831
dtype: float64

In [20]:
nas_df.drop(columns=["EIRCODE ID"], inplace=True)



In [21]:
nas_df.drop(columns=["Randomised ID"], inplace=True)

In [22]:
nas_df["PickupTown"] = nas_df["PickupTown"].fillna("Unknown")


In [23]:
nas_df = nas_df.rename(columns={"DateCallForPerf": "date"})

In [24]:
nas_df.head(10)

Unnamed: 0,date,Time of Day,PickupTown,DespatchCode,DespCodeDescription,Hospital Attended,region
0,2016-01-01,Night,DUNGLOE,17B01P,Falls - Possibly Dangerous body Area - Public ...,LETTERKENNY GENERAL HOSPITAL,HSE West and North West
1,2016-01-01,Night,BEAUMONT DUBLIN 9,17B01G,Falls - Possibly Dangerous body Area - On the ...,BEAUMONT HOSPITAL,HSE Dublin and North East
2,2016-01-01,Night,DROGHEDA,17B01,Falls - Possibly Dangerous body Area -,LOURDES HOSPITAL DROGHEDA,HSE Dublin and North East
3,2016-01-01,Night,BORRISOKANE,17B01P,Falls - Possibly Dangerous body Area - Public ...,PORTIUNCULA GENERAL HOSPITAL,HSE West and North West
4,2016-01-01,Night,WEXFORD,17B01G,Falls - Possibly Dangerous body Area - On the ...,Unknown,Unknown
5,2016-01-01,Night,KILLARNEY,17D04G,Falls - Not Alert - On the ground or floor,KERRY UNIVERSITY HOSPITAL,HSE South East
6,2016-01-01,Night,RATHNEENY WEST LAGHEY,17B01,Falls - Possibly Dangerous body Area -,SLIGO GENERAL HOSPITAL,HSE West and North West
7,2016-01-01,Morning,GLENAGEARY,17B00G,Falls - Bravo Override - On the ground or floor,ST VINCENTS HOSPITAL,HSE Dublin and South East
8,2016-01-01,Morning,DUNDALK,17B01,Falls - Possibly Dangerous body Area -,LOURDES HOSPITAL DROGHEDA,HSE Dublin and North East
9,2016-01-01,Morning,CAUSEWAY,17B01G,Falls - Possibly Dangerous body Area - On the ...,KERRY UNIVERSITY HOSPITAL,HSE South East


In [25]:
nas_df.to_csv('/home/paulharford/college/project/project_data/nas/nas_clean.csv', index=False)