#### Set styling for plotting

In [48]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as ticker
import seaborn as sns
sns.set_palette('colorblind')
from matplotlib.pyplot import tight_layout
# ##SETTING PARAMS FOR MATPLOTLIB FIGURES
plt.rcParams.update({"figure.figsize": (6, 6),
                 "axes.facecolor": "white",
                 "axes.edgecolor": "black"})
plt.rcParams['axes.prop_cycle'] = plt.cycler(color=sns.color_palette('colorblind'))
##set font size
font = {'family': 'sans-serif',
       'weight': 'normal',
       'size': 14}
plt.rc('font', **font)
# ##PANDAS PLOTTING
pd.plotting.register_matplotlib_converters()

#### Step 1: save environment file

In [49]:
!conda env export > combined_met_environment.yml

#### Step 2: import modules

In [50]:
import glob
import os
import xml.etree.ElementTree as ET
from datetime import datetime, timezone
from tqdm import tqdm
import codecs
import csv

## INFORMATION

##load the 3 processed .csv fiels for combination in a single complete dataset 

#### Step 3: import csv files

In [51]:
data_directory_xml = "/home/paulharford/college/project/project_data/met_eireann/xml_warnings_2018_2023_08.csv"
data_directory_ods = "/home/paulharford/college/project/project_data/met_eireann/ods_warnings_2017_2020.csv"
data_directory_xl = "/home/paulharford/college/project/project_data/met_eireann/xl_warnings_2023.csv"
full_path_xml = os.path.abspath(data_directory_xml)
full_path_ods = os.path.abspath(data_directory_ods)
full_path_xl = os.path.abspath(data_directory_xl)

In [52]:
# Read the 2023 excel file
df_xml = pd.read_csv(full_path_xml)
df_ods = pd.read_csv(full_path_ods)
df_xl = pd.read_csv(full_path_xl)

In [53]:
# First, ensure our datetime conversions are correct
df_xml['Issue Time'] = pd.to_datetime(df_xml['Issue Time'])
df_ods['Issue Time'] = pd.to_datetime(df_ods['Issue Time'])

# Get counts for specific years (2018-2020) for both datasets
xml_year_counts = df_xml['Issue Time'].dt.year.value_counts().sort_index()
ods_year_counts = df_ods['Issue Time'].dt.year.value_counts().sort_index()

# Create a DataFrame to display the years side by side
comparison_df = pd.DataFrame({
    'XML Dataset': xml_year_counts,
    'ODS Dataset': ods_year_counts
})

# Filter 2018-2020
comparison_df = comparison_df.loc[2018:2020]

# Calculate the difference between datasets
comparison_df['Difference'] = comparison_df['XML Dataset'] - comparison_df['ODS Dataset']

# Display the comparison
print("\nComparison of Events (2018-2020):")
print(comparison_df)

# Optional: Create a percentage difference column to see relative changes
comparison_df['Percentage Difference'] = (
    (comparison_df['XML Dataset'] - comparison_df['ODS Dataset']) / 
    comparison_df['ODS Dataset'] * 100
).round(2)

print("\nWith Percentage Difference:")
print(comparison_df)


Comparison of Events (2018-2020):
            XML Dataset  ODS Dataset  Difference
Issue Time                                      
2018              264.0        249.0        15.0
2019              179.0        187.0        -8.0
2020              274.0        277.0        -3.0

With Percentage Difference:
            XML Dataset  ODS Dataset  Difference  Percentage Difference
Issue Time                                                             
2018              264.0        249.0        15.0                   6.02
2019              179.0        187.0        -8.0                  -4.28
2020              274.0        277.0        -3.0                  -1.08


#### Step:6 confirm date/time settings, check date ranges and select appropriate rnages to combine 

In [54]:
##make sure datetime is the same in both dataframes
# Ensure datetime columns are consistently UTC
datetime_cols = ['Issue Time', 'Valid From', 'Valid To']
df_ods[datetime_cols] = df_ods[datetime_cols].apply(pd.to_datetime, utc=True)
df_xml[datetime_cols] = df_xml[datetime_cols].apply(pd.to_datetime, utc=True)
df_xl[datetime_cols] = df_xl[datetime_cols].apply(pd.to_datetime, utc=True)

In [55]:
##Check Columns
df_ods.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1598 entries, 0 to 1597
Data columns (total 33 columns):
 #   Column           Non-Null Count  Dtype              
---  ------           --------------  -----              
 0   Issue Time       1598 non-null   datetime64[ns, UTC]
 1   Valid From       1598 non-null   datetime64[ns, UTC]
 2   Valid To         1598 non-null   datetime64[ns, UTC]
 5   WhereToText      1596 non-null   object             
 7   Clare            1598 non-null   int64              
 8   Cork             1598 non-null   int64              
 9   Kerry            1598 non-null   int64              
 10  Limerick         1598 non-null   int64              
 11  Tipperary        1598 non-null   int64              
 12  Waterford        1598 non-null   int64              
 13  Carlow           1598 non-null   int64              
 14  Dublin           1598 non-null   int64              
 15  Kildare          1598 non-null   int64              
 16  Kilkenny         1

In [56]:
##where to text column not required
df_ods.drop("WhereToText", axis=1, inplace=True)

In [57]:
df_xml.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2016 entries, 0 to 2015
Data columns (total 33 columns):
 #   Column           Non-Null Count  Dtype              
---  ------           --------------  -----              
 0   Issue Time       2016 non-null   datetime64[ns, UTC]
 1   Valid From       2016 non-null   datetime64[ns, UTC]
 2   Valid To         2016 non-null   datetime64[ns, UTC]
 5   WhereToText      2016 non-null   object             
 7   Carlow           2016 non-null   int64              
 8   Cavan            2016 non-null   int64              
 9   Clare            2016 non-null   int64              
 10  Cork             2016 non-null   int64              
 11  Donegal          2016 non-null   int64              
 12  Galway           2016 non-null   int64              
 13  Kerry            2016 non-null   int64              
 14  Kildare          2016 non-null   int64              
 15  Kilkenny         2016 non-null   int64              
 16  Laois            2

In [58]:
df_xml.drop("WhereToText", axis=1, inplace=True)

In [59]:
df_xl.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 299 entries, 0 to 298
Data columns (total 36 columns):
 #   Column           Non-Null Count  Dtype              
---  ------           --------------  -----              
 1   Carlow           299 non-null    int64              
 2   Cavan            299 non-null    int64              
 3   Clare            299 non-null    int64              
 4   Cork             299 non-null    int64              
 5   Donegal          299 non-null    int64              
 6   Dublin           299 non-null    int64              
 7   Galway           299 non-null    int64              
 8   Kerry            299 non-null    int64              
 9   Kildare          299 non-null    int64              
 10  Kilkenny         299 non-null    int64              
 11  Laois            299 non-null    int64              
 12  Leitrim          299 non-null    int64              
 13  Limerick         299 non-null    int64              
 14  Longford         299

In [60]:
# Define the desired order
desired_order = [
    "Issue Time", "Valid From", "Valid To", "Warning Colour", "Warning Element",
    "Warning Text", "Clare", "Cork", "Kerry", "Limerick", 
    "Tipperary", "Waterford", "Carlow", "Dublin", "Kildare", "Kilkenny", 
    "Laois", "Longford", "Louth", "Meath", "Offaly", "Westmeath", "Wexford", 
    "Wicklow", "Cavan", "Donegal", "Monaghan", "Galway", "Leitrim", "Mayo", 
    "Roscommon", "Sligo"
]

# This will select (and order) only those columns; note that if your second DataFrame 
# is missing any of these, a KeyError will be raised.
df_xl_ro = df_xl[desired_order]

In [61]:
df_xl_ro.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 299 entries, 0 to 298
Data columns (total 32 columns):
 #   Column           Non-Null Count  Dtype              
---  ------           --------------  -----              
 0   Issue Time       286 non-null    datetime64[ns, UTC]
 1   Valid From       287 non-null    datetime64[ns, UTC]
 2   Valid To         285 non-null    datetime64[ns, UTC]
 6   Clare            299 non-null    int64              
 7   Cork             299 non-null    int64              
 8   Kerry            299 non-null    int64              
 9   Limerick         299 non-null    int64              
 10  Tipperary        299 non-null    int64              
 11  Waterford        299 non-null    int64              
 12  Carlow           299 non-null    int64              
 13  Dublin           299 non-null    int64              
 14  Kildare          299 non-null    int64              
 15  Kilkenny         299 non-null    int64              
 16  Laois            299

In [62]:
### date ranges
print("ODS Filtered Date Range:")
print(f"Start date: {df_ods['Issue Time'].min()}")
print(f"End date: {df_ods['Issue Time'].max()}")
print("XML Filtered Date Range:")
print(f"Start date: {df_xml['Issue Time'].min()}")
print(f"End date: {df_xml['Issue Time'].max()}")
print("XML Filtered Date Range:")
print(f"Start date: {df_xl_ro['Issue Time'].min()}")
print(f"End date: {df_xl_ro['Issue Time'].max()}")

ODS Filtered Date Range:
Start date: 2013-02-21 18:00:00+00:00
End date: 2020-12-30 14:00:00+00:00
XML Filtered Date Range:
Start date: 2018-01-01 14:02:14+00:00
End date: 2023-08-04 11:49:46+00:00
XML Filtered Date Range:
Start date: 2023-01-01 13:22:00+00:00
End date: 2023-12-29 19:12:00+00:00


In [63]:
df_ods_filtered = df_ods[
    (df_ods['Issue Time'] >= '2014-01-01') & 
    (df_ods['Issue Time'] <= '2017-12-31 23:59:59')
]
df_xml_filtered = df_xml[
    (df_xml['Issue Time'] >= '2018-01-01') & 
    (df_xml['Issue Time'] <= '2022-12-31 23:59:59')
]
df_xl_filtered = df_xl_ro[
    (df_xl_ro['Issue Time'] >= '2023-01-01') & 
    (df_xl_ro['Issue Time'] <= '2023-12-29 23:59:59')
]

In [64]:
df_combined = pd.concat([df_ods_filtered, df_xml_filtered, df_xl_filtered], ignore_index=True)

In [65]:
print("Combined Filtered Date Range:")
print(f"Start date: {df_combined['Issue Time'].min()}")
print(f"End date: {df_combined['Issue Time'].max()}")

Combined Filtered Date Range:
Start date: 2014-01-02 09:00:00+00:00
End date: 2023-12-29 19:12:00+00:00


In [66]:
df_combined.isnull().sum()

Issue Time         0
Valid From         0
Valid To           2
Clare              0
Cork               0
Kerry              0
Limerick           0
Tipperary          0
Waterford          0
Carlow             0
Dublin             0
Kildare            0
Kilkenny           0
Laois              0
Longford           0
Louth              0
Meath              0
Offaly             0
Westmeath          0
Wexford            0
Wicklow            0
Cavan              0
Donegal            0
Monaghan           0
Galway             0
Leitrim            0
Mayo               0
Roscommon          0
Sligo              0
dtype: int64

In [67]:
color_missing = df_combined[df_combined['Warning Colour'].isna()]
print(f"Number of rows with missing Warning Colour: {len(color_missing)}")
color_missing.head(5)



Unnamed: 0,Issue Time,Valid From,Valid To,Warning Colour,Warning Element,Warning Text,Clare,Cork,Kerry,Limerick,...,Wexford,Wicklow,Cavan,Donegal,Monaghan,Galway,Leitrim,Mayo,Roscommon,Sligo
2559,2023-07-10 16:08:00+00:00,2023-07-10 16:08:00+00:00,2023-07-10 22:00:00+00:00,,Thunderstorm,"Scattered heavy showers are expected, with thu...",0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2627,2023-09-24 08:06:00+00:00,2023-09-24 12:00:00+00:00,2023-09-24 18:00:00+00:00,,Rainfall,Intense rainfall over a relatively short time ...,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2717,2023-12-09 11:44:00+00:00,2023-12-10 14:00:00+00:00,2023-12-10 21:00:00+00:00,,Wind,Storm Fergus will generate very strong and gus...,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2732,2023-12-25 13:01:00+00:00,2023-12-26 20:00:00+00:00,2023-12-27 02:00:00+00:00,,Rainfall,"Thundery and blustery rain, leading to localis...",0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2734,2023-12-26 11:50:00+00:00,2023-12-26 20:00:00+00:00,2023-12-28 00:00:00+00:00,,Rainfall,"Very heavy rain overnight (Tuesday), followed ...",0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [68]:
df_combined = df_combined.dropna(subset=['Warning Colour'])

In [69]:
df_combined = df_combined.dropna(subset=['Valid To'])

In [70]:
df_combined['Warning Text'] = df_combined['Warning Text'].fillna('no_warning_text')

In [71]:
# List unique Warning Elements
unique_warning_elements = df_weather_cat['weather_type'].unique()
print("Unique Warning Elements:")
print(unique_warning_elements)

# Optionally, if you'd like them sorted:
print("Sorted Unique Warning Elements:")
print(sorted(unique_warning_elements))



In [72]:
def normalize_warning(warning_element: str, warning_color: str = None) -> tuple:
    """
    Normalize weather warning types to a consistent format, separating phenomenon and severity.
    
    Args:
        warning_element: The warning text to normalize
        warning_color: Optional color from separate column (Red, Orange, Yellow)
        
    Returns:
        Tuple of (phenomenon, severity)
    """
    text = warning_element.lower().strip()
    
    # Step 1: Determine severity using color column if available
    if warning_color and warning_color.strip():
        if warning_color.lower() in ['red', 'extreme']:
            severity = "Red"
        elif warning_color.lower() in ['orange', 'severe']:
            severity = "Orange"
        elif warning_color.lower() in ['yellow', 'moderate']:
            severity = "Yellow"
        else:
            severity = "Unknown"
    else:
        # Fall back to text analysis if no color provided
        if any(term in text for term in ["extreme", "red"]):
            severity = "Red"
        elif any(term in text for term in ["severe", "orange"]):
            severity = "Orange"
        elif any(term in text for term in ["moderate", "yellow"]):
            severity = "Yellow"
        elif "minor" in text:
            severity = "Minor"
        else:
            severity = "Unknown"
    
    # Step 2: Determine phenomenon with prioritized matching
    # Create a hierarchical priority order to ensure consistent classification
    if "thunder" in text or "tstorm" in text or "thunderstorm" in text:
        phenomenon = "Thunder"
    elif "hail" in text:
        phenomenon = "Hail"
    elif "snow" in text and "ice" in text:
        phenomenon = "Snow_Ice"
    elif "snow" in text:
        phenomenon = "Snow"
    elif "ice" in text and ("low" in text or "temperature" in text):
        phenomenon = "Low-Temperature_Ice"
    elif "ice" in text:
        phenomenon = "Ice"
    elif "fog" in text and "ice" in text:
        phenomenon = "Ice_Fog"
    elif "fog" in text:
        phenomenon = "Fog"
    elif ("rain" in text or "rainfall" in text) and "thunder" in text:
        phenomenon = "Thunder_Rainfall"
    elif "rain" in text or "rainfall" in text:
        phenomenon = "Rainfall"
    elif "high" in text and "temperature" in text:
        phenomenon = "High-Temperature"
    elif "low" in text and "temperature" in text:
        phenomenon = "Low-Temperature"
    elif "small-craft" in text or "wind" in text or "gust" in text or "storm" in text:
        phenomenon = "Wind"
    else:
        phenomenon = "Unknown"
    
    return phenomenon, severity


def update_dataframe_with_normalized_warnings(df, warning_column, color_column=None):
    """
    Update dataframe with normalized warning categories, separating phenomenon and severity.
    
    Args:
        df: Pandas DataFrame containing warnings
        warning_column: Column name containing the warning text
        color_column: Optional column name containing color information
    
    Returns:
        DataFrame with added 'warning_phenomenon' and 'warning_severity' columns
    """
    # Apply the normalization function to get both phenomenon and severity
    if color_column and color_column in df.columns:
        result = df.apply(
            lambda row: normalize_warning(row[warning_column], row[color_column]), 
            axis=1
        )
    else:
        result = df[warning_column].apply(normalize_warning)
    
    # Split the result into separate columns
    df['warning_phenomenon'] = [r[0] for r in result]
    df['warning_severity'] = [r[1] for r in result]
    
    # For compatibility with existing code, you can optionally create a combined column
    df['weather_type'] = df['warning_phenomenon'] + " warning"
    
    return df


In [73]:
df_weather_cat = update_dataframe_with_normalized_warnings(
    df_combined, 
    warning_column='Warning Element',  # Replace with your actual column name
    color_column='Warning Colour'      # Replace with your actual color column name (if available)
)



In [74]:
# List unique Warning Elements
unique_warning_elements = df_weather_cat['weather_type'].unique()
print("Unique Warning Elements:")
print(unique_warning_elements)

# Optionally, if you'd like them sorted:
print("Sorted Unique Warning Elements:")
print(sorted(unique_warning_elements))



In [75]:
minor_rows = df_combined[df_combined["weather_type"].str.contains("minor", case=False, na=False)]

# Show the filtered rows
minor_rows.head()

Unnamed: 0,Issue Time,Valid From,Valid To,Warning Colour,Warning Element,Warning Text,Clare,Cork,Kerry,Limerick,...,Donegal,Monaghan,Galway,Leitrim,Mayo,Roscommon,Sligo,warning_phenomenon,warning_severity,weather_type


In [76]:
df_weather_cat.isnull().sum()

Issue Time            0
Valid From            0
Valid To              0
Clare                 0
Cork                  0
Kerry                 0
Limerick              0
Tipperary             0
Waterford             0
Carlow                0
Dublin                0
Kildare               0
Kilkenny              0
Laois                 0
Longford              0
Louth                 0
Meath                 0
Offaly                0
Westmeath             0
Wexford               0
Wicklow               0
Cavan                 0
Donegal               0
Monaghan              0
Galway                0
Leitrim               0
Mayo                  0
Roscommon             0
Sligo                 0
weather_type          0
dtype: int64

In [77]:
county_cols = [
    "Clare", "Cork", "Kerry", "Limerick", "Tipperary", "Waterford",
    "Carlow", "Dublin", "Kildare", "Kilkenny", "Laois", "Longford",
    "Louth", "Meath", "Offaly", "Westmeath", "Wexford", "Wicklow",
    "Cavan", "Donegal", "Monaghan", "Galway", "Leitrim", "Mayo",
    "Roscommon", "Sligo"
]

rows_all_zero = df_weather_cat[(df_weather_cat[county_cols] == 0).all(axis=1)]
row_count = len(rows_all_zero)
print(row_count)
print(rows_all_zero)

305
                    Issue Time                Valid From  \
14   2014-01-06 21:00:00+00:00 2014-01-06 21:00:00+00:00   
40   2014-02-10 12:00:00+00:00 2014-02-10 22:00:00+00:00   
112  2014-08-09 23:00:00+00:00 2014-08-09 23:00:00+00:00   
315  2015-08-02 14:00:00+00:00 2015-08-03 09:00:00+00:00   
389  2015-12-04 11:00:00+00:00 2015-12-04 11:00:00+00:00   
...                        ...                       ...   
2743 2023-12-28 10:58:00+00:00 2023-12-29 06:00:00+00:00   
2744 2023-12-28 16:47:00+00:00 2023-12-30 02:00:00+00:00   
2745 2023-12-29 11:00:00+00:00 2023-12-30 02:00:00+00:00   
2746 2023-12-29 11:09:00+00:00 2023-12-30 23:00:00+00:00   
2747 2023-12-29 19:12:00+00:00 2023-12-30 05:00:00+00:00   

14   2014-01-07 05:00:00+00:00         Yellow            Wind   
40   2014-02-11 03:00:00+00:00         Yellow            Wind   
112  2014-08-10 15:00:00+00:00         Yellow            Rain   
315  2015-08-03 21:00:00+00:00         Yellow            Wind   
389  2015-12-05

In [78]:
##add length of event feature
# Ensure the 'Valid From' and 'Valid To' columns are in datetime format
df_weather_cat['Valid From'] = pd.to_datetime(df_weather_cat['Valid From'])

df_weather_cat['Valid To'] = pd.to_datetime(df_weather_cat['Valid To'])

# Calculate the duration in hours and create a new column
df_weather_cat['Duration_hours'] = (df_weather_cat['Valid To'] - df_weather_cat['Valid From']).dt.total_seconds() / 3600

# Check the first few rows to verify the new column
print(df_weather_cat[['Valid From', 'Valid To', 'Duration_hours']].head())


                 Valid From                  Valid To  Duration_hours
0 2014-01-02 17:00:00+00:00 2014-01-03 14:00:00+00:00            21.0
1 2014-01-02 17:00:00+00:00 2014-01-03 14:00:00+00:00            21.0
2 2014-01-03 14:00:00+00:00 2014-01-03 19:00:00+00:00             5.0
3 2014-01-04 03:00:00+00:00 2014-01-04 14:00:00+00:00            11.0
4 2014-01-05 08:00:00+00:00 2014-01-05 20:00:00+00:00            12.0


In [79]:
#so for group by region later i'm goign to add a column called region and for any county that has an 1 a region is added to the region column
## then, when multiple regions ar ein play we explode to have each disticnt event in each region (duplicat ethe event if its in multiple regions) 

#import pandas as pd

# Define your HSE region mapping.
hse_regions = {
    'HSE Dublin and North East': ['Cavan', 'Monaghan', 'Louth', 'Meath', 'Dublin'],
    'HSE Dublin and Midlands': ['Kildare', 'Wicklow', 'Laois', 'Offaly', 'Longford', 'Westmeath', 'Dublin'],
    'HSE Dublin and South East': ['Carlow', 'Kilkenny', 'Tipperary', 'Waterford', 'Wexford', 'Wicklow', 'Dublin'],
    'HSE Mid West': ['Clare', 'Limerick', 'Tipperary'],
    'HSE South West': ['Cork', 'Kerry'],
    'HSE West and North West': ['Donegal', 'Sligo', 'Leitrim', 'Mayo', 'Galway', 'Roscommon']
}

# List of all county columns
county_cols = [
    "Clare", "Cork", "Kerry", "Limerick", "Tipperary", "Waterford",
    "Carlow", "Dublin", "Kildare", "Kilkenny", "Laois", "Longford",
    "Louth", "Meath", "Offaly", "Westmeath", "Wexford", "Wicklow",
    "Cavan", "Donegal", "Monaghan", "Galway", "Leitrim", "Mayo",
    "Roscommon", "Sligo"
]

# Function to determine which regions apply to an event
def get_regions(row):
    # Identify counties with an event (value == 1)
    triggered_counties = [county for county in county_cols if row.get(county, 0) == 1]
    
    # If no county is flagged, treat it as an "all Ireland" event by returning all regions.
    if not triggered_counties:
        return list(hse_regions.keys())
    
    # Otherwise, check which regions have any of the triggered counties.
    regions = set()
    for region, counties in hse_regions.items():
        if any(county in triggered_counties for county in counties):
            regions.add(region)
    return list(regions)

# Assuming df_combined is your DataFrame, add a column with the region list.
df_weather_cat['region_list'] = df_weather_cat.apply(get_regions, axis=1)

# Explode the region_list so each event appears once per region.
df_expanded = df_weather_cat.explode('region_list').rename(columns={'region_list': 'region'})

# Optional: Add a column 'county_count' that counts, for that region, how many counties are flagged.
df_expanded['county_count'] = df_expanded.apply(
    lambda row: sum(row[county] for county in hse_regions[row['region']]),
    axis=1
)

df_expanded.head(15)




Unnamed: 0,Issue Time,Valid From,Valid To,Warning Colour,Warning Element,Warning Text,Clare,Cork,Kerry,Limerick,...,Leitrim,Mayo,Roscommon,Sligo,warning_phenomenon,warning_severity,weather_type,Duration_hours,region,county_count
0,2014-01-02 09:00:00+00:00,2014-01-02 17:00:00+00:00,2014-01-03 14:00:00+00:00,Orange,Wind,Becoming stormy this evening and tonight and c...,1,1,1,1,...,1,1,1,1,Wind,Orange,Wind warning,21.0,HSE West and North West,6
0,2014-01-02 09:00:00+00:00,2014-01-02 17:00:00+00:00,2014-01-03 14:00:00+00:00,Orange,Wind,Becoming stormy this evening and tonight and c...,1,1,1,1,...,1,1,1,1,Wind,Orange,Wind warning,21.0,HSE South West,2
0,2014-01-02 09:00:00+00:00,2014-01-02 17:00:00+00:00,2014-01-03 14:00:00+00:00,Orange,Wind,Becoming stormy this evening and tonight and c...,1,1,1,1,...,1,1,1,1,Wind,Orange,Wind warning,21.0,HSE Mid West,2
1,2014-01-02 09:00:00+00:00,2014-01-02 17:00:00+00:00,2014-01-03 14:00:00+00:00,Yellow,Wind,Becoming extremely windy or stormy this evenin...,0,0,0,0,...,0,0,0,0,Wind,Yellow,Wind warning,21.0,HSE Dublin and North East,5
1,2014-01-02 09:00:00+00:00,2014-01-02 17:00:00+00:00,2014-01-03 14:00:00+00:00,Yellow,Wind,Becoming extremely windy or stormy this evenin...,0,0,0,0,...,0,0,0,0,Wind,Yellow,Wind warning,21.0,HSE Dublin and Midlands,7
1,2014-01-02 09:00:00+00:00,2014-01-02 17:00:00+00:00,2014-01-03 14:00:00+00:00,Yellow,Wind,Becoming extremely windy or stormy this evenin...,0,0,0,0,...,0,0,0,0,Wind,Yellow,Wind warning,21.0,HSE Mid West,1
1,2014-01-02 09:00:00+00:00,2014-01-02 17:00:00+00:00,2014-01-03 14:00:00+00:00,Yellow,Wind,Becoming extremely windy or stormy this evenin...,0,0,0,0,...,0,0,0,0,Wind,Yellow,Wind warning,21.0,HSE Dublin and South East,7
2,2014-01-02 09:00:00+00:00,2014-01-03 14:00:00+00:00,2014-01-03 19:00:00+00:00,Yellow,Wind,Blustery for the rest of the afternoon with so...,1,1,1,1,...,1,1,1,1,Wind,Yellow,Wind warning,5.0,HSE West and North West,6
2,2014-01-02 09:00:00+00:00,2014-01-03 14:00:00+00:00,2014-01-03 19:00:00+00:00,Yellow,Wind,Blustery for the rest of the afternoon with so...,1,1,1,1,...,1,1,1,1,Wind,Yellow,Wind warning,5.0,HSE Dublin and Midlands,7
2,2014-01-02 09:00:00+00:00,2014-01-03 14:00:00+00:00,2014-01-03 19:00:00+00:00,Yellow,Wind,Blustery for the rest of the afternoon with so...,1,1,1,1,...,1,1,1,1,Wind,Yellow,Wind warning,5.0,HSE Dublin and North East,5


In [81]:
df_expanded.to_csv('/home/paulharford/college/project/project_data/processed/WEATHERED_warnings_2014-2023_cleaned_v3.csv', index=False)