### Set styling for plotting

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as ticker
import seaborn as sns
sns.set_palette('colorblind')
from matplotlib.pyplot import tight_layout
# ##SETTING PARAMS FOR MATPLOTLIB FIGURES
plt.rcParams.update({"figure.figsize": (6, 6),
                 "axes.facecolor": "white",
                 "axes.edgecolor": "black"})
plt.rcParams['axes.prop_cycle'] = plt.cycler(color=sns.color_palette('colorblind'))
##set font size
font = {'family': 'sans-serif',
       'weight': 'normal',
       'size': 14}
plt.rc('font', **font)
# ##PANDAS PLOTTING
pd.plotting.register_matplotlib_converters()

### Step 1: save environment file

In [2]:
!conda env export > nas_environment.yml

### Step 2: import modules

In [3]:
import glob
import os
import xml.etree.ElementTree as ET
from datetime import datetime, timezone
from tqdm import tqdm
import codecs
import csv
import numpy as np
import pandas as pd
!pip install openpyxl
%matplotlib inline

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com


### Step 3: import data files

In [4]:
data_directory_xl = "/home/paulharford/college/project/project_data/nas/WEATHERED Report CAD 20241217.xlsx"
full_path_xl = os.path.abspath(data_directory_xl)

In [5]:
##Import data from excel into panda's dataframe
df_nas = pd.read_excel(full_path_xl)
df_towns = pd.read_csv('ie-towns.csv') 

In [6]:
df_nas.head(10)

Unnamed: 0,Randomised ID,DateCallForPerf,Time of Day,PickupTown,EIRCODE ID,DespatchCode,DespCodeDescription,Hospital Attended
0,54959224086,2016-01-01,Night,DUNGLOE,,17B01P,Falls - Possibly Dangerous body Area - Public ...,LETTERKENNY GENERAL HOSPITAL
1,54959224146,2016-01-01,Night,BEAUMONT DUBLIN 9,,17B01G,Falls - Possibly Dangerous body Area - On the ...,BEAUMONT HOSPITAL
2,54959224172,2016-01-01,Night,DROGHEDA,,17B01,Falls - Possibly Dangerous body Area -,LOURDES HOSPITAL DROGHEDA
3,54959224182,2016-01-01,Night,BORRISOKANE,,17B01P,Falls - Possibly Dangerous body Area - Public ...,PORTIUNCULA GENERAL HOSPITAL
4,54959224201,2016-01-01,Night,WEXFORD,,17B01G,Falls - Possibly Dangerous body Area - On the ...,
5,54959224289,2016-01-01,Night,KILLARNEY,,17D04G,Falls - Not Alert - On the ground or floor,KERRY UNIVERSITY HOSPITAL
6,54959224429,2016-01-01,Night,RATHNEENY WEST LAGHEY,,17B01,Falls - Possibly Dangerous body Area -,SLIGO GENERAL HOSPITAL
7,54959224658,2016-01-01,Morning,GLENAGEARY,,17B00G,Falls - Bravo Override - On the ground or floor,ST VINCENTS HOSPITAL
8,54959224681,2016-01-01,Morning,DUNDALK,,17B01,Falls - Possibly Dangerous body Area -,LOURDES HOSPITAL DROGHEDA
9,54959224682,2016-01-01,Morning,CAUSEWAY,,17B01G,Falls - Possibly Dangerous body Area - On the ...,KERRY UNIVERSITY HOSPITAL


### Step 4.1: Check Hospital Attended column for list of hospitals for mapping to regions

In [7]:
unique_hospitals = df_nas["Hospital Attended"].unique()
print(unique_hospitals)


['LETTERKENNY GENERAL HOSPITAL' 'BEAUMONT HOSPITAL'
 'LOURDES HOSPITAL DROGHEDA' 'PORTIUNCULA GENERAL HOSPITAL' nan
 'KERRY UNIVERSITY HOSPITAL' 'SLIGO GENERAL HOSPITAL'
 'ST VINCENTS HOSPITAL' 'UNIVERSITY HOSPITAL LIMERICK'
 'SOUTH TIPPERARY GENERAL' 'MULLINGAR REGIONAL HOSPITAL'
 'NAAS GENERAL HOSPITAL' 'CORK UNIVERSITY HOSPITAL'
 'PORTLAOISE GENERAL HOSPITAL' 'CONNOLLY MEMORIAL HOSPITAL'
 'CAVAN GENERAL HOSPITAL' 'MAYO GENERAL HOSPITAL'
 'UNIVERSITY HOSPITAL GALWAY' 'WATERFORD UNIVERSITY HOSPITAL'
 'TULLAMORE GENERAL HOSPITAL' 'WEXFORD GENERAL HOSPITAL'
 'ST JAMES HOSPITAL' 'MERCY UNIVERSITY HOSPITAL'
 'ST MICHAELS DUN LAOGHAIRE' 'OUR LADYS NAVAN' 'TALLAGHT HOSPITAL'
 'MATER HOSPITAL DUBLIN' 'BANTRY GENERAL HOSPITAL'
 'ST LUKES HOSPITAL KILKENNY' 'SACRED HEART HOSPITAL MAYO'
 'ST COLUMCILLES HOSPITAL LOUGHLINSTOWN' 'MALLOW GENERAL HOSPITAL'
 'DAISY HILL HOSPTIAL NEWRY' 'ALTNAGELVIN HOSPITAL'
 'CORK UNIVERSITY MATERNITY HOSP' 'BON SECOURS HOSPITAL CORK'
 'ENNIS GENERAL HOSPITAL' 'ROS

In [8]:
##add regional information
hse_mapping = {
    "HSE Dublin and North East": [
        "BEAUMONT HOSPITAL", "LOURDES HOSPITAL DROGHEDA", "CAVAN GENERAL HOSPITAL",
        "OUR LADYS NAVAN", "MATER HOSPITAL DUBLIN", "CONNOLLY MEMORIAL HOSPITAL",
        "LOUTH COUNTY HOSPITAL"
    ],
    "HSE Dublin and Midlands": [
        "MULLINGAR REGIONAL HOSPITAL", "NAAS GENERAL HOSPITAL",
        "PORTLAOISE GENERAL HOSPITAL", "TULLAMORE GENERAL HOSPITAL",
        "ST JAMES HOSPITAL", "TALLAGHT HOSPITAL"
    ],
    "HSE Dublin and South East": [
        "ST VINCENTS HOSPITAL", "ST MICHAELS DUN LAOGHAIRE", "HOLLES STREET MATERNITY HOSPITAL",
        "ST COLUMCILLES HOSPITAL LOUGHLINSTOWN", "ST LUKES HOSPITAL KILKENNY",
        "SOUTH TIPPERARY GENERAL HOSPITAL", "WATERFORD UNIVERSITY HOSPITAL",
        "WEXFORD GENERAL HOSPITAL","SOUTH TIPPERARY GENERAL"
    ],
    "HSE South East": [
        "CORK UNIVERSITY HOSPITAL", "CORK UNIVERSITY MATERNITY HOSP",
        "MERCY UNIVERSITY HOSPITAL", "MALLOW GENERAL HOSPITAL", "BANTRY GENERAL HOSPITAL",
        "SOUTH INFIRMARY-VICTORIA UNIVERSITY HOSPITAL", "ST FINBARRS HOSPITAL CORK", "KERRY UNIVERSITY HOSPITAL"
    ],
    "HSE Mid West": [
        "UNIVERSITY HOSPITAL LIMERICK", "ENNIS GENERAL HOSPITAL", "NENAGH GENERAL HOSPITAL",
        "ST JOHNS HOSPITAL LIMERICK", "LIMERICK MATERNITY HOSPITAL"
    ],
    "HSE West and North West": [
        "LETTERKENNY GENERAL HOSPITAL", "PORTIUNCULA GENERAL HOSPITAL",
        "MAYO GENERAL HOSPITAL", "UNIVERSITY HOSPITAL GALWAY", "ROSCOMMON GENERAL HOSPITAL",
        "SLIGO GENERAL HOSPITAL", "MERLIN PARK HOSPITAL"
    ]
}



### Step 4.2: Map hospitals to regions 

In [9]:
def categorize_hospital(hospital_name):
    # Check if hospital_name is a valid string, otherwise return "Hospital_Unknown"
    if not isinstance(hospital_name, str):
        return "Unknown"
    
    for region, hospitals in hse_mapping.items():
        if hospital_name.upper() in hospitals:
            return region
    return "Hospital Unknown"

In [10]:
df_nas["region"] = df_nas["Hospital Attended"].apply(categorize_hospital)

In [11]:
df_nas.head(15)

Unnamed: 0,Randomised ID,DateCallForPerf,Time of Day,PickupTown,EIRCODE ID,DespatchCode,DespCodeDescription,Hospital Attended,region
0,54959224086,2016-01-01,Night,DUNGLOE,,17B01P,Falls - Possibly Dangerous body Area - Public ...,LETTERKENNY GENERAL HOSPITAL,HSE West and North West
1,54959224146,2016-01-01,Night,BEAUMONT DUBLIN 9,,17B01G,Falls - Possibly Dangerous body Area - On the ...,BEAUMONT HOSPITAL,HSE Dublin and North East
2,54959224172,2016-01-01,Night,DROGHEDA,,17B01,Falls - Possibly Dangerous body Area -,LOURDES HOSPITAL DROGHEDA,HSE Dublin and North East
3,54959224182,2016-01-01,Night,BORRISOKANE,,17B01P,Falls - Possibly Dangerous body Area - Public ...,PORTIUNCULA GENERAL HOSPITAL,HSE West and North West
4,54959224201,2016-01-01,Night,WEXFORD,,17B01G,Falls - Possibly Dangerous body Area - On the ...,,Unknown
5,54959224289,2016-01-01,Night,KILLARNEY,,17D04G,Falls - Not Alert - On the ground or floor,KERRY UNIVERSITY HOSPITAL,HSE South East
6,54959224429,2016-01-01,Night,RATHNEENY WEST LAGHEY,,17B01,Falls - Possibly Dangerous body Area -,SLIGO GENERAL HOSPITAL,HSE West and North West
7,54959224658,2016-01-01,Morning,GLENAGEARY,,17B00G,Falls - Bravo Override - On the ground or floor,ST VINCENTS HOSPITAL,HSE Dublin and South East
8,54959224681,2016-01-01,Morning,DUNDALK,,17B01,Falls - Possibly Dangerous body Area -,LOURDES HOSPITAL DROGHEDA,HSE Dublin and North East
9,54959224682,2016-01-01,Morning,CAUSEWAY,,17B01G,Falls - Possibly Dangerous body Area - On the ...,KERRY UNIVERSITY HOSPITAL,HSE South East


In [12]:
##check for NaN/null values
(df_nas.isna().sum()/df_nas.shape[0] * 100).sort_values()

Randomised ID           0.000000
DateCallForPerf         0.000000
Time of Day             0.000000
DespatchCode            0.000000
DespCodeDescription     0.000000
region                  0.000000
PickupTown              0.008376
EIRCODE ID              8.379831
Hospital Attended      15.922065
dtype: float64

In [13]:
##for the hospital attended that are nana if we have the pickup town we can infer the Hospital attended
## this is not always teh case as patients can be brought to hospitals outsode of their region
## but as 15% of the data is missing its reasonable to assume that the majoring of patiemst went to theire local hospital
unknown_hospitals_df = df_nas[df_nas["region"] == "Unknown"][["Hospital Attended", "PickupTown"]].drop_duplicates()
unknown_hospitals_df.head()



Unnamed: 0,Hospital Attended,PickupTown
4,,WEXFORD
16,,CARROWCUBIC BALLYCASTLE
23,,SHANKILL DUBLIN 18
30,,CELBRIDGE
46,,NAVAN


In [14]:
unique_pickup_towns = unknown_hospitals_df["PickupTown"].unique()

# Print the unique PickupTown values
print(unique_pickup_towns)

['WEXFORD' 'CARROWCUBIC BALLYCASTLE' 'SHANKILL DUBLIN 18' ...
 'MASINASS CREESLOUGH' 'CARRIGLEA DUNGARVAN' 'KILLAREE CHARLEVILLE']


In [15]:
df_nas.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 155206 entries, 0 to 155205
Data columns (total 9 columns):
 #   Column               Non-Null Count   Dtype         
---  ------               --------------   -----         
 0   Randomised ID        155206 non-null  int64         
 1   DateCallForPerf      155206 non-null  datetime64[ns]
 2   Time of Day          155206 non-null  object        
 3   PickupTown           155193 non-null  object        
 4   EIRCODE ID           142200 non-null  object        
 5   DespatchCode         155206 non-null  object        
 6   DespCodeDescription  155206 non-null  object        
 7   Hospital Attended    130494 non-null  object        
 8   region               155206 non-null  object        
dtypes: datetime64[ns](1), int64(1), object(7)
memory usage: 10.7+ MB


### Step 4.3: For Hospital Attended - unknown use the pickup town to determine hospital 

In [16]:
## Using Hospital; Attended to map to region if there is no hospital then map the pickuptown to county and then the region 

# HSE Regions mapping
hse_regions = {
    "HSE Dublin and North East": ["Cavan", "Monaghan", "Louth", "Meath", "Dublin"],
    "HSE Dublin and Midlands": ["Kildare", "Wicklow", "Laois", "Offaly", "Longford", "Westmeath", "Dublin"],
    "HSE Dublin and South East": ["Carlow", "Kilkenny", "Tipperary", "Waterford", "Wexford", "Wicklow", "Dublin"],
    "HSE Mid West": ["Clare", "Limerick", "Tipperary"],
    "HSE South West": ["Cork", "Kerry"],
    "HSE West and North West": ["Donegal", "Sligo", "Leitrim", "Mayo", "Galway", "Roscommon"]
}

hospital_to_region = {
    # HSE Dublin and North East - RCSI Hospitals
    "Beaumont Hospital": "HSE Dublin and North East",
    "Connolly Hospital": "HSE Dublin and North East",
    "CONNOLLY MEMORIAL HOSPITAL": "HSE Dublin and North East",
    "Our Lady of Lourdes Hospital, Drogheda": "HSE Dublin and North East",
    "BEAUMONT HOSPITAL": "HSE Dublin and North East",
    "CONNOLLY HOSPITAL": "HSE Dublin and North East",
    "LOURDES HOSPITAL DROGHEDA": "HSE Dublin and North East",
    "OUR LADYS NAVAN": "HSE Dublin and North East",
    "CAVAN GENERAL HOSPITAL": "HSE Dublin and North East",
    
    # HSE Dublin and Midlands - Dublin Midlands Hospital Group
    "Midland Regional Hospital, Tullamore": "HSE Dublin and Midlands",
    "St James's Hospital": "HSE Dublin and Midlands",
    "Tallaght University Hospital": "HSE Dublin and Midlands",
    "TULLAMORE HOSPITAL": "HSE Dublin and Midlands",
    "TULLAMORE GENERAL HOSPITAL": "HSE Dublin and Midlands",
    "ST JAMES HOSPITAL": "HSE Dublin and Midlands",
    "TALLAGHT HOSPITAL": "HSE Dublin and Midlands",
    "NAAS GENERAL HOSPITAL": "HSE Dublin and Midlands",
    "MULLINGAR REGIONAL HOSPITAL": "HSE Dublin and Midlands",
    "PORTLAOISE GENERAL HOSPITAL": "HSE Dublin and Midlands",
    "TEMPLE ST CHILDRENS HOSPITAL": "HSE Dublin and Midlands",
    "HERMITAGE MEDICAL CENTRE DUBLIN": "HSE Dublin and Midlands",
    
    # HSE Dublin and South East - Ireland East Hospital Group
    "Mater Misericordiae University Hospital": "HSE Dublin and South East",
    "St Vincent's University Hospital": "HSE Dublin and South East",
    "MATER HOSPITAL": "HSE Dublin and South East",
    "ST VINCENTS HOSPITAL": "HSE Dublin and South East",
    "WEXFORD GENERAL HOSPITAL": "HSE Dublin and South East",
    "ST LUKES HOSPITAL KILKENNY": "HSE Dublin and South East",
    "ST MICHAELS DUN LAOGHAIRE": "HSE Dublin and South East",
    "ST COLUMCILLES HOSPITAL LOUGHLINSTOWN": "HSE Dublin and South East",
    
    # HSE Mid West - UL Hospital Group
    "University Hospital Limerick": "HSE Mid West",
    "UNIVERSITY HOSPITAL LIMERICK": "HSE Mid West",
    "LIMERICK REGIONAL HOSPITAL": "HSE Mid West",
    "ENNIS GENERAL HOSPITAL": "HSE Mid West",
    "NENAGH GENERAL HOSPITAL": "HSE Mid West",
    "LIMERICK MATERNITY HOSPITAL": "HSE Mid West",
    "ST JOHNS HOSPITAL LIMERICK": "HSE Mid West",
    "PCI CENTRE LIMERICK": "HSE Mid West",
    
    # HSE South West - South/South West Hospital Group
    "Cork University Hospital": "HSE South West",
    "University Hospital Kerry": "HSE South West",
    "University Hospital Waterford": "HSE South West",
    "CORK UNIVERSITY HOSPITAL": "HSE South West",
    "KERRY GENERAL HOSPITAL": "HSE South West",
    "KERRY UNIVERSITY HOSPITAL": "HSE South West",
    "WATERFORD UNIVERSITY HOSPITAL": "HSE South West",
    "SOUTH TIPPERARY GENERAL": "HSE South West",
    "SOUTH TIPPERARY GENERAL HOSPITAL": "HSE South West",
    "MERCY UNIVERSITY HOSPITAL": "HSE South West",
    "BANTRY GENERAL HOSPITAL": "HSE South West",
    "MALLOW GENERAL HOSPITAL": "HSE South West",
    "CORK UNIVERSITY MATERNITY HOSP": "HSE South West",
    "BONS SECOURS HOSPITAL TRALEE": "HSE South West",
    
    # HSE West and North West - SAOLTA University Hospital Group
    "Letterkenny University Hospital": "HSE West and North West",
    "Mayo University Hospital": "HSE West and North West",
    "Sligo University Hospital": "HSE West and North West",
    "University Hospital Galway": "HSE West and North West",
    "LETTERKENNY GENERAL HOSPITAL": "HSE West and North West",
    "MAYO GENERAL HOSPITAL": "HSE West and North West",
    "SLIGO UNIVERSITY HOSPITAL": "HSE West and North West",
    "SLIGO GENERAL HOSPITAL": "HSE West and North West",
    "GALWAY UNIVERSITY HOSPITAL": "HSE West and North West",
    "PORTIUNCULA GENERAL HOSPITAL": "HSE West and North West",
    "ROSCOMMON GENERAL HOSPITAL": "HSE West and North West",
    "DONEGAL DISTRICT HOSPITAL": "HSE West and North West",
    "ST JOHNS HOSPITAL SLIGO": "HSE West and North West",
    
    # Northern Ireland hospitals (not part of HSE)
    "ALTNAGELVIN HOSPITAL": "Northern Ireland",
    "DAISY HILL HOSPTIAL NEWRY": "Northern Ireland",
    "CRAIGAVON AREA HOSPITAL": "Northern Ireland",
    "SOUTH WEST ACCUTE HOSPITAL": "Northern Ireland"
}

# Create a lookup dictionary from the DataFrame (using lower-case keys for case-insensitive lookup)
town_to_county = {row["name"].lower(): row["county"] for _, row in df_towns.iterrows()}
hospital_to_region_lowercase = {k.lower(): v for k, v in hospital_to_region.items()}

# Function to determine HSE region from a county name
def get_hse_region(county):
    if county is None:
        return None
    county_upper = county.upper()
    for region, counties in hse_regions.items():
        if any(county.upper() == c.upper() for c in counties):
            return region
    return None

# Function to determine region based on Hospital and PickupTown
def determine_region(row):
    # First check if Hospital Attended is available and use the hospital mapping
    if pd.notna(row["Hospital Attended"]) and row["Hospital Attended"].lower() != "unknown":
        hospital_lower = row["Hospital Attended"].lower()
        # Direct lookup
        if hospital_lower in hospital_to_region_lowercase:
            return hospital_to_region_lowercase[hospital_lower]
        # Partial match: if any hospital mapping key is in the hospital name or vice versa
        for hospital, region in hospital_to_region_lowercase.items():
            if hospital in hospital_lower or hospital_lower in hospital:
                return region

    # Fallback: Use the PickupTown to find the county, then get the region
    if pd.notna(row["PickupTown"]):
        town_lower = row["PickupTown"].lower()
        county = None
        # Attempt an exact match first
        if town_lower in town_to_county:
            county = town_to_county[town_lower]
        else:
            # Optionally, perform a substring match if exact match is not found
            for t, c in town_to_county.items():
                if t in town_lower:
                    county = c
                    break

        if county:
            region = get_hse_region(county)
            if region:
                return region

    # If all else fails, return "Unknown"
    return "Unknown"


# Apply the determine_region function to assign regions
df_nas["region"] = df_nas.apply(determine_region, axis=1)

# Optional: Display statistics on the region assignment
region_counts = df_nas["region"].value_counts()
print("Region assignment counts:")
print(region_counts)
total_rows = len(df_nas)
unknown_count = region_counts.get("Unknown", 0)
print(f"Unknown regions: {unknown_count} out of {total_rows} rows ({unknown_count/total_rows*100:.2f}%)")


Region assignment counts:
region
HSE South West               36208
HSE West and North West      32449
HSE Dublin and South East    27302
HSE Dublin and North East    24347
HSE Dublin and Midlands      22645
HSE Mid West                 12198
Northern Ireland                56
Unknown                          1
Name: count, dtype: int64
Unknown regions: 1 out of 155206 rows (0.00%)


In [17]:
# Filter for rows with Unknown region
unknown_regions = df_nas[df_nas['region'] == 'Unknown']

# Check how many have a value in PickupTown
pickup_counts = unknown_regions['PickupTown'].notna().sum()
print(f"Unknown regions with PickupTown value: {pickup_counts} out of {len(unknown_regions)} ({pickup_counts/len(unknown_regions)*100:.2f}%)")

# Get the unique PickupTown values for unknown regions
unique_towns = unknown_regions['PickupTown'].dropna().unique()
print(f"Number of unique towns in unknown regions: {len(unique_towns)}")

# Display the most common towns in unknown regions
town_value_counts = unknown_regions['PickupTown'].value_counts().head(20)
print("\nTop 20 most common towns in unknown regions:")
print(town_value_counts)

print("\nAll towns not being matched:")
all_unique_towns = sorted(unknown_regions['PickupTown'].dropna().unique())
for town in all_unique_towns:
    print(f"- '{town}'")

Unknown regions with PickupTown value: 0 out of 1 (0.00%)
Number of unique towns in unknown regions: 0

Top 20 most common towns in unknown regions:
Series([], Name: count, dtype: int64)

All towns not being matched:


In [18]:
# Filter for rows with Unknown region
unknown_regions = df_nas[df_nas['region'] == 'Unknown']

# Check count of Unknown regions
print(f"Total number of unknown regions: {len(unknown_regions)}")

# Count the most common Hospital Attended values for unknown regions
hospital_counts = unknown_regions['Hospital Attended'].value_counts().head(15)
print("\nMost common Hospital Attended values for unknown regions:")
print(hospital_counts)

Total number of unknown regions: 1

Most common Hospital Attended values for unknown regions:
Series([], Name: count, dtype: int64)


In [19]:
unknown_regions.head(10)

Unnamed: 0,Randomised ID,DateCallForPerf,Time of Day,PickupTown,EIRCODE ID,DespatchCode,DespCodeDescription,Hospital Attended,region
93242,54961626447,2021-07-18,Evening,,,17B04G,Falls - Unknown Status other codes not applica...,,Unknown


In [20]:
df_nas.drop(df_nas[df_nas['region'] == 'Unknown'].index, inplace=True)


### Step 4.4: Check for any furthe rmissing values and drop unneeded columns

In [21]:
df_nas['Hospital Attended'] = df_nas['Hospital Attended'].fillna('unknown_hospital')

In [22]:
df_nas.drop(columns=["EIRCODE ID"], inplace=True)

In [23]:
df_nas.drop(columns=["Randomised ID"], inplace=True)

In [26]:
df_nas['PickupTown'] = df_nas['PickupTown'].fillna('unknown_pickuptown')

In [27]:
##check for NaN/null values
(df_nas.isna().sum()/df_nas.shape[0] * 100).sort_values()

DateCallForPerf        0.0
Time of Day            0.0
PickupTown             0.0
DespatchCode           0.0
DespCodeDescription    0.0
Hospital Attended      0.0
region                 0.0
dtype: float64

In [28]:
# Check some problematic examples
unknown_regions = df_nas[df_nas['region'] == 'Unknown']
print(f"Towns without regions: {unknown_regions['PickupTown'].unique()[:10]}")

Towns without regions: []


In [29]:
df_nas = df_nas.rename(columns={"DateCallForPerf": "date"})

In [30]:
# Check for rows where 'hospital attended' is 'unknown'
unknown_rows = df_nas[df_nas['Hospital Attended'] == 'NaN']

# Display these rows
print(unknown_rows)

Empty DataFrame
Columns: [date, Time of Day, PickupTown, DespatchCode, DespCodeDescription, Hospital Attended, region]
Index: []


### Step 5: Save data to file

In [32]:
df_nas.to_csv('/home/paulharford/college/project/project_data/nas/WEATHERED_nas_clean_v1.csv', index=False)