## Initial analysis of weather and ihfd

In [1]:
import pandas as pd
import matplotlib as plt
import glob
import os
import xml.etree.ElementTree as ET
from datetime import datetime, timezone
from tqdm import tqdm
import codecs
import csv

In [2]:
data_directory_weather = "/mnt/hgfs/shared/project_data/processed/WEATHERED_warnings_2014-2023_cleaned.csv"
data_directory_ihfd = "/mnt/hgfs/shared/project_data/processed/ihfd_clean.csv"
full_path_ihfd = os.path.abspath(data_directory_ihfd)
full_path_weather = os.path.abspath(data_directory_weather)


In [3]:
df_ihfd= pd.read_csv(full_path_ihfd)
df_weather = pd.read_csv(data_directory_weather)


In [11]:
# List out all the HSE region columns in weather
weather_region_cols = [
    "HSE Dublin and North East",
    "HSE Dublin and Midlands",
    "HSE Dublin and South East",
    "HSE Mid West",
    "HSE South West",
    "HSE West and North West",
    # any others you have
]

df_weather_long = df_weather.melt(
    id_vars=["date", "Issue Time", "Valid From", "Valid To", "Warning Colour", 
             "Warning Element", "Warning Text", "Duration_hours", "Year"],
    value_vars=weather_region_cols,
    var_name="region",
    value_name="weather_flag"
)



In [12]:
df_ihfd.rename(columns={"New Health Regions": "region"}, inplace=True)


In [26]:
df_ihfd["Adm_First_Pres_Hosp_DateTime"] = pd.to_datetime(df_ihfd["Adm_First_Pres_Hosp_DateTime"])
df_ihfd["date_only"] = df_ihfd["Adm_First_Pres_Hosp_DateTime"].dt.date
# convert to a pandas datetime if you prefer consistency
df_ihfd["date_only"] = pd.to_datetime(df_ihfd["date_only"])

In [27]:
df_weather_long["Valid From"] = pd.to_datetime(df_weather_long["Valid From"])
df_weather_long["date_only"] = df_weather_long["Valid From"].dt.date
df_weather_long["date_only"] = pd.to_datetime(df_weather_long["date_only"])

In [31]:
# ----- Merge on date_only and region -----
df_merged = pd.merge(
    df_ihfd,
    df_weather_long,
    on=["date_only", "region"],
    how="left",
    indicator=True
)

In [32]:
df_merged.head()

Unnamed: 0,region,NOCA_TraumaPeriodDay,Adm_First_Pres_Hosp_DateTime,NOCA_FirstPresPeriodDay,NOCA_AgeRange,NOCA_Gender,LOS,Adm_Trauma_TYPE,Adm_Ward_Type,Adm_Pre_Frac_Indoor,...,Issue Time,Valid From,Valid To,Warning Colour,Warning Element,Warning Text,Duration_hours,Year,weather_flag,_merge
0,HSE Mid West,AM,2016-09-06,PM,70-74,Female,12,2.0,1.0,2.0,...,,NaT,,,,,,,,left_only
1,HSE Mid West,AM,2016-09-15,Night,90-94,Female,4,2.0,1.0,2.0,...,,NaT,,,,,,,,left_only
2,HSE Mid West,AM,2016-09-14,AM,75-79,Male,6,2.0,1.0,2.0,...,,NaT,,,,,,,,left_only
3,HSE Mid West,AM,2016-09-04,AM,90-94,Female,16,2.0,1.0,2.0,...,,NaT,,,,,,,,left_only
4,HSE Mid West,AM,2016-08-23,Night,65-69,Male,29,2.0,1.0,2.0,...,,NaT,,,,,,,,left_only


In [33]:
df_merged["_merge"].value_counts()

_merge
left_only     28418
both          27898
right_only        0
Name: count, dtype: int64

In [34]:
# After the merge:
df_merged["weather_flag"] = df_merged["weather_flag"].fillna(0)


In [36]:
# List the columns you want to fill with 0
columns_to_fill = [
    "Issue Time", 
    "Valid From", 
    "Valid To", 
    "Warning Colour", 
    "Warning Element", 
    "Warning Text", 
    "Duration_hours", 
    "Year"
]

# Replace NaN with 0 in those columns
df_merged[columns_to_fill] = df_merged[columns_to_fill].fillna(0)


In [37]:
df_merged.head(10)

Unnamed: 0,region,NOCA_TraumaPeriodDay,Adm_First_Pres_Hosp_DateTime,NOCA_FirstPresPeriodDay,NOCA_AgeRange,NOCA_Gender,LOS,Adm_Trauma_TYPE,Adm_Ward_Type,Adm_Pre_Frac_Indoor,...,Issue Time,Valid From,Valid To,Warning Colour,Warning Element,Warning Text,Duration_hours,Year,weather_flag,_merge
0,HSE Mid West,AM,2016-09-06,PM,70-74,Female,12,2.0,1.0,2.0,...,0,0,0,0,0,0,0.0,0.0,0.0,left_only
1,HSE Mid West,AM,2016-09-15,Night,90-94,Female,4,2.0,1.0,2.0,...,0,0,0,0,0,0,0.0,0.0,0.0,left_only
2,HSE Mid West,AM,2016-09-14,AM,75-79,Male,6,2.0,1.0,2.0,...,0,0,0,0,0,0,0.0,0.0,0.0,left_only
3,HSE Mid West,AM,2016-09-04,AM,90-94,Female,16,2.0,1.0,2.0,...,0,0,0,0,0,0,0.0,0.0,0.0,left_only
4,HSE Mid West,AM,2016-08-23,Night,65-69,Male,29,2.0,1.0,2.0,...,0,0,0,0,0,0,0.0,0.0,0.0,left_only
5,HSE Mid West,AM,2016-09-13,AM,90-94,Female,8,2.0,1.0,2.0,...,0,0,0,0,0,0,0.0,0.0,0.0,left_only
6,HSE Mid West,AM,2016-09-13,AM,75-79,Female,8,2.0,1.0,2.0,...,0,0,0,0,0,0,0.0,0.0,0.0,left_only
7,HSE Mid West,AM,2016-09-13,AM,80-84,Female,8,2.0,1.0,2.0,...,0,0,0,0,0,0,0.0,0.0,0.0,left_only
8,HSE Mid West,AM,2016-09-14,AM,80-84,Female,8,2.0,1.0,2.0,...,0,0,0,0,0,0,0.0,0.0,0.0,left_only
9,HSE Mid West,AM,2016-09-21,PM,90-94,Female,3,2.0,1.0,2.0,...,0,0,0,0,0,0,0.0,0.0,0.0,left_only


In [38]:
df_merged.drop(columns=["Year", "_merge"], inplace=True)


In [39]:
counts = df_merged["weather_flag"].value_counts()
print(counts)

weather_flag
0.0    42582
1.0    13734
Name: count, dtype: int64


In [40]:
df_merged.to_csv('/mnt/hgfs/shared/project_data/processed/WEATHERED_met-eirenn_ihfd_merged.csv', index=False)