#### Set styling for plotting

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as ticker
import seaborn as sns
sns.set_palette('colorblind')
from matplotlib.pyplot import tight_layout
# ##SETTING PARAMS FOR MATPLOTLIB FIGURES
plt.rcParams.update({"figure.figsize": (6, 6),
                 "axes.facecolor": "white",
                 "axes.edgecolor": "black"})
plt.rcParams['axes.prop_cycle'] = plt.cycler(color=sns.color_palette('colorblind'))
##set font size
font = {'family': 'sans-serif',
       'weight': 'normal',
       'size': 14}
plt.rc('font', **font)
# ##PANDAS PLOTTING
pd.plotting.register_matplotlib_converters()

#### Step 1: save environment file

In [3]:
!conda env export > combined_met_environment.yml

#### Step 2: import modules

In [4]:
import glob
import os
import xml.etree.ElementTree as ET
from datetime import datetime, timezone
from tqdm import tqdm
import codecs
import csv

## INFORMATION

## ODS - Met Eirean old system (manual) for recording Adverse weather
Start date: 2012-04-25 12:00:00
End date: 2021-02-17 09:00:00
only 1654 events in totat which seems low compared to the new rss xml system even with Advisories removed
will use data from 2013 to when the new system starts in 2018 

## XLSX 
some of 2023 was missing from the xml style data so Met Eireann sent on the full 2023 in this excel data format 

#### Step 3: import csv files

In [5]:
data_directory_xml = "/mnt/hgfs/shared/weather_warnings/archive_warnings/archive"
data_directory_ods = "/mnt/hgfs/shared/project_data/met_eireann/Archived_Wx_Warnings_25April2012_17February2021.ods"
data_directory_xl = "/mnt/hgfs/shared/project_data/met_eireann/National warnings from pdfs_2023.xlsx"
full_path_xml = os.path.abspath(data_directory_xml)
full_path_ods = os.path.abspath(data_directory_ods)
full_path_xl = os.path.abspath(data_directory_xl)

In [6]:
# Read the .ods file
df_ods = pd.read_excel(full_path_ods, engine='odf', parse_dates=['Issue Time', 'Valid From', 'Valid To'])
# Read the 2023 excel file
df_xl = pd.read_excel(full_path_xl)

In [7]:
# check the first few rows
print(df_ods.head())

# look at dataframe info
print(df_ods.info())

# check the df shape
print(f"Number of rows: {df_ods.shape[0]}")
print(f"Number of columns: {df_ods.shape[1]}")

print("ODS Filtered Date Range:")
print(f"Start date: {df_ods['Issue Time'].min()}")
print(f"End date: {df_ods['Issue Time'].max()}")

0 2012-04-25 12:00:00 2012-04-25 12:00:00 2012-04-26 12:00:00         Yellow   
1 2012-06-01 21:00:00 2012-06-02 12:00:00 2012-06-03 21:00:00         Yellow   
2 2012-06-02 14:00:00 2012-06-02 14:00:00 2012-06-03 12:00:00         Orange   
3 2012-06-08 10:00:00 2012-06-08 10:00:00 2012-06-08 23:59:00         Yellow   
4 2012-06-14 20:00:00 2012-06-14 20:00:00 2012-06-16 12:00:00         Yellow   

0            Rain                               Munster and Leinster   
1            Rain                     Munster, Connacht and Leinster   
2            Rain                               Munster and Leinster   
3            Rain                              Connacht and Leinster   
4            Rain  Munster, Leinster, Connacht, Donegal, Monaghan...   

0  Heavy rain moving into Southern coastal counti...     True   True   True   
1  Between 25 and 65 mm of rain possible, (heavie...     True   True   True   
2  Between 25mm & 65mm of rain expected over Lein...     True   True   True   
3

In [8]:
# check the first few rows
print(df_xl.head())

# look at dataframe info
print(df_xl.info())

# check the df shape
print(f"Number of rows: {df_xl.shape[0]}")
print(f"Number of columns: {df_xl.shape[1]}")

#print("ODS Filtered Date Range:")
#print(f"Start date: {df_xl['Issue Time'].min()}")
#print(f"End date: {df_xl['Issue Time'].max()}")

            Unnamed: 0 Unnamed: 1  \
0                Total        NaN   
1  2023-01-01 00:00:00    Fog/Ice   
2                  NaN        NaN   
3                  NaN        NaN   
4                  NaN        NaN   

                                      unprotect cafo Unnamed: 3 Unnamed: 4  \
0                                                NaN     Yellow     Orange   
1                                     Level: Yellow         244         50   
2                                   Type: Fog / Ice           0          0   
3  Message: Icy stretches along with patches of f...          0          0   
4                          Affected Regions: ireland          0          0   

  Unnamed: 5 Unnamed: 6  Unnamed: 7 Unnamed: 8  Unnamed: 9  ... Unnamed: 33  \
0        Red      Named         NaN    Ireland         NaN  ...          MH   
1        6.5          0         NaN          0         NaN  ...           0   
2          0          0         NaN          0         NaN  ...        

#### Step 4: process XML data 

In [9]:
def severity_to_color(severity):
    """Map severity levels to warning colors"""
    mapping = {
        'Extreme': 'Red',
        'Severe': 'Orange',
        'Moderate': 'Yellow'
    }
    return mapping.get(severity, 'notmapped')

def get_element_text(element, path, namespace):
    """Safely get text from an XML element"""
    found = element.find(path, namespace)
    return found.text if found is not None else None

def parse_xml(file_path_xml):
    """Parse an XML file and extract relevant information."""
    try:
        # Read the file content
        with codecs.open(file_path_xml, 'r', encoding='utf-8', errors='ignore') as file:
            xml_content = file.read()
        
        # Parse the XML content
        root = ET.fromstring(xml_content)
        namespace = {'cap': 'urn:oasis:names:tc:emergency:cap:1.2'}
        
        # Extract data from the info element first to check if we should process this warning
        info = root.find('cap:info', namespace)
        if info is None:
            return None
            
        # Check for advisory warnings (type 22) - exclude them
        parameters = info.findall('cap:parameter', namespace)
        for param in parameters:
            if get_element_text(param, 'cap:valueName', namespace) == 'awareness_type':
                awareness_type = get_element_text(param, 'cap:value', namespace)
                if awareness_type and '22' in awareness_type:
                    return None
                break
        
        # Check for county information
        area = info.find('cap:area', namespace)
        if area is None:
            return None
            
        geocodes = area.findall('cap:geocode', namespace)
        if not geocodes:
            return None

        # Initialize row with the old column format
        row = {
            'Issue Time': get_element_text(root, 'cap:sent', namespace),
            'Valid From': (get_element_text(info, 'cap:effective', namespace) or 
                         get_element_text(info, 'cap:onset', namespace)),
            'Valid To': get_element_text(info, 'cap:expires', namespace),
            'Warning Element': get_element_text(info, 'cap:event', namespace),
            'Warning Text': get_element_text(info, 'cap:description', namespace),
            'WhereToText': get_element_text(area, 'cap:areaDesc', namespace),
            'Warning Colour': severity_to_color(get_element_text(info, 'cap:severity', namespace))
        }
        
        # Initialize all county columns to 0
        county_info = {
            'EI01': 'Carlow', 'EI02': 'Cavan', 'EI03': 'Clare', 'EI04': 'Cork', 'EI32': 'Cork City',
            'EI06': 'Donegal', 'EI33': 'Dublin City', 'EI34': 'Dún Laoghaire-Rathdown', 'EI35': 'Fingal',
            'EI10': 'Galway', 'EI36': 'Galway City', 'EI11': 'Kerry', 'EI12': 'Kildare', 'EI13': 'Kilkenny',
            'EI15': 'Laois', 'EI14': 'Leitrim', 'EI42': 'Limerick', 'EI37': 'Limerick City', 'EI18': 'Longford',
            'EI19': 'Louth', 'EI20': 'Mayo', 'EI21': 'Meath', 'EI22': 'Monaghan', 'EI23': 'Offaly',
            'EI24': 'Roscommon', 'EI25': 'Sligo', 'EI39': 'South Dublin', 'EI43': 'Tipperary',
            'EI44': 'Waterford', 'EI29': 'Westmeath', 'EI30': 'Wexford', 'EI31': 'Wicklow'
        }
        
        # Initialize all counties to 0
        for county_name in county_info.values():
            row[county_name] = 0
            
        # Set affected counties to 1
        for gc in geocodes:
            if get_element_text(gc, 'cap:valueName', namespace) == 'FIPS':
                county_code = get_element_text(gc, 'cap:value', namespace)
                if county_code in county_info:
                    row[county_info[county_code]] = 1
        
        return row
    
    except ET.ParseError as e:
        return None
    except Exception as e:
        return None

def process_files(data_directory):
    """Process all XML files in the given directory and extract weather warning data."""
    file_pattern = os.path.join(data_directory, '*.xml')
    file_list = glob.glob(file_pattern)

    data = []
    error_files = []
    excluded_files = []

    for file in tqdm(file_list, desc="Processing files"):
        row = parse_xml(file)
        if row is not None:
            data.append(row)
        else:
            try:
                with codecs.open(file, 'r', encoding='utf-8', errors='ignore') as f:
                    ET.parse(f)
                excluded_files.append(file)
            except:
                error_files.append(file)

    # Create DataFrame
    df_xml = pd.DataFrame(data)

    # Convert date fields to datetime
    date_columns = ['Issue Time', 'Valid From', 'Valid To']
    for col in date_columns:
        if col in df_xml.columns:
            df_xml[col] = pd.to_datetime(df_xml[col], utc=True, errors='coerce')

    return df_xml, error_files, excluded_files

def save_error_files(file_list, filename):
    """Save list of error files to CSV"""
    pd.DataFrame({'file': file_list}).to_csv(filename, index=False)


df_xml, error_files, excluded_files = process_files(data_directory_xml)

# Save results
# df_xml.to_csv('weather_warnings.csv', index=False)
save_error_files(error_files, 'error_files.csv')
save_error_files(excluded_files, 'excluded_files.csv')

Processing files: 100%|██████████| 10697/10697 [04:04<00:00, 43.82it/s]


In [13]:
# check the first few rows
print(df_xml.head())

# look at dataframe info
print(df_xml.info())

# check the df shape
print(f"Number of rows: {df_xml.shape[0]}")
print(f"Number of columns: {df_xml.shape[1]}")

print("XML Filtered Date Range:")
print(f"Start date: {df_xml['Issue Time'].min()}")
print(f"End date: {df_xml['Issue Time'].max()}")
df_xml.to_csv("output.csv", index=False)

                 Issue Time                Valid From  \
0 2018-03-29 19:46:16+00:00 2018-03-22 23:00:01+00:00   
1 2018-03-29 19:50:05+00:00 2018-03-29 19:00:01+00:00   
2 2018-03-29 21:56:16+00:00 2018-03-29 19:00:01+00:00   
3 2018-03-29 23:10:21+00:00 2018-03-29 19:00:01+00:00   
4 2018-03-30 04:15:33+00:00 2018-03-29 19:00:01+00:00   


0  Heavy showery rain continuing overnight with s...     Ireland   
1  Heavy showery rain overnight with some wintry ...     Ireland   
2  Heavy showery rain overnight with some wintry ...     Ireland   
3  Heavy showery rain overnight with some wintry ...     Ireland   
4  Heavy showery rain overnight with some wintry ...     Ireland   

0         Yellow       1      1      1  ...         1       1          1   
1         Yellow       1      1      1  ...         1       1          1   
2         Yellow       1      1      1  ...         1       1          1   
3         Yellow       1      1      1  ...         1       1          1   
4         Y

#### Step 5: removing unneeded data and missing data from ODS 

In [14]:
##we dont need provinces will add hse regions after combined
df_ods= df_ods.drop(['Connacht', 'Leinster', 'Munster', 'Ulster'], axis=1)

In [15]:
# Create a copy of the filtered data
df_ods = df_ods.copy()

## Its not clear what Tipperary SR actually is and is not the same in the XML data so i'm going to merge tipperary and tipperary SR 
df_ods['Tipperary'] = df_ods[['Tipperary', 'Tipperary SR']].max(axis=1)
df_ods = df_ods.drop('Tipperary SR', axis=1)

In [17]:
###XML data has a lot of duplicates for the same event just issued multiple times, so this will consolidate into single events 
# Import pandas for data manipulation
import pandas as pd
from datetime import datetime

def load_weather_warnings(file_path):
    """
    Load weather warnings from a CSV file and perform initial data cleaning.
    
    This function reads the CSV file and ensures all datetime fields are properly
    formatted for consistent analysis. It handles the initial data preparation
    step that's essential for accurate consolidation later.
    
    Parameters:
    file_path (str): Path to the CSV file containing weather warnings
    
    Returns:
    pd.DataFrame: Cleaned DataFrame with parsed dates
    """
    # Read the CSV file into a pandas DataFrame
    df = pd.read_csv(file_path)
    
    # Convert all datetime columns to proper datetime objects
    # This ensures we can perform accurate temporal analysis and grouping
    datetime_columns = ['Issue Time', 'Valid From', 'Valid To']
    for col in datetime_columns:
        df[col] = pd.to_datetime(df[col])
    
    return df

def consolidate_warnings(df):
    """
    Consolidate weather warnings by grouping similar events.
    
    This function identifies and groups warnings that represent the same weather event
    by combining their key characteristics. It maintains the original column names
    for 'Issue Time', 'Valid From', and 'Valid To' while adding new metrics about
    reissues.
    
    Parameters:
    df (pd.DataFrame): DataFrame containing weather warnings
    
    Returns:
    pd.DataFrame: Consolidated warnings with metrics about reissues
    """
    # Create a unique identifier for each distinct weather event
    # This combines all the key characteristics that make a warning unique
    df['event_key'] = df.apply(
        lambda x: f"{x['Valid From']}_{x['Valid To']}_{x['Warning Element']}_{x['Warning Colour']}_{x['WhereToText']}", 
        axis=1
    )
    
    # Group by the event key and aggregate the important information
    # We keep track of how many times each warning was issued and when
    df_xml_consolidated = df.groupby('event_key').agg({
        'Issue Time': ['first', 'count', 'min', 'max'],  # We'll keep 'first' as the representative Issue Time
        'Valid From': 'first',
        'Valid To': 'first',
        'Warning Element': 'first',
        'Warning Text': 'first',
        'WhereToText': 'first',
        'Warning Colour': 'first'
    }).reset_index()
    
    # The aggregation creates MultiIndex columns - let's flatten them and rename appropriately
    # First, create a list of the new column names we want
    new_columns = [
        'event_key',
        'Issue Time',  # Keep original name for the first issue time
        'issue_count',
        'first_issue',
        'last_issue',
        'Valid From',  # Keep original name
        'Valid To',    # Keep original name
        'warning_type',
        'warning_text',
        'location',
        'warning_colour'
    ]
    
    # Flatten the MultiIndex and assign new column names
    df_xml_consolidated.columns = df_xml_consolidated.columns.map('_'.join)
    df_xml_consolidated.columns = new_columns
    
    return df_xml_consolidated

def analyze_warnings(df):
    """
    Generate summary statistics about the weather warnings.
    
    This function calculates key metrics about the warning dataset to help
    understand patterns in how warnings are issued and reissued.
    
    Parameters:
    df (pd.DataFrame): Consolidated weather warnings DataFrame
    
    Returns:
    dict: Dictionary containing analysis metrics
    """
    analysis = {
        'total_warnings': len(df),
        'unique_events': df['event_key'].nunique(),
        'warning_types': df['warning_type'].value_counts().to_dict(),
        'most_reissued': df.nlargest(1, 'issue_count')[['warning_type', 'location', 'issue_count']].to_dict('records')[0],
        'avg_issues_per_event': df['issue_count'].mean()
    }
    return analysis

# Main execution flow
# Load and process the data
df = load_weather_warnings('output.csv')

# Consolidate the warnings
df_xml_consolidated = consolidate_warnings(df)

# Generate analysis
analysis_results = analyze_warnings(df_xml_consolidated)

# Print key findings
print("\nWeather Warnings Analysis Summary:")
print(f"Total warnings issued: {analysis_results['total_warnings']}")
print(f"Number of unique events: {analysis_results['unique_events']}")
print(f"\nWarning types frequency:")
for warning_type, count in analysis_results['warning_types'].items():
    print(f"- {warning_type}: {count}")
print(f"\nMost reissued warning:")
print(f"- Type: {analysis_results['most_reissued']['warning_type']}")
print(f"- Location: {analysis_results['most_reissued']['location']}")
print(f"- Times issued: {analysis_results['most_reissued']['issue_count']}")
print(f"\nAverage issues per event: {analysis_results['avg_issues_per_event']:.2f}")

# Export the consolidated data to CSV
df_xml_consolidated.to_csv('consolidated_weather_warnings.csv', index=False)
print("\nConsolidated warnings have been exported to 'consolidated_weather_warnings.csv'")


Number of unique events: 3027

- Yellow Wind: 643
- Yellow Rain: 593
- Yellow Thunderstorm: 205
- Orange Wind: 130
- Yellow Snow/Ice: 123
- Yellow Low Temperature/Ice: 107
- Orange Rain: 61
- Yellow fog: 50
- Red Wind: 39
- Yellow High Temperature: 35
- Orange Low Temperature/Ice: 24
- Orange Thunderstorm: 24
- Orange Snow/Ice: 22
- Orange High Temperature: 3
- Orange Fog: 3
- Rain: 1

- Location: Carlow, Kildare, Kilkenny, Laois, Longford, Offaly, Westmeath, Cavan, Monaghan, Leitrim, Roscommon, Limerick and Tipperary
- Times issued: 60

Average issues per event: 2.66



#### Step:6 confirm date/time settings, check date ranges and select appropriate rnages to combine 

In [19]:
##make sure datetime is the same in both dataframes
# Ensure datetime columns are consistently UTC
datetime_cols = ['Issue Time', 'Valid From', 'Valid To']
df_ods[datetime_cols] = df_ods[datetime_cols].apply(pd.to_datetime, utc=True)
df_xml_consolidated[datetime_cols] = df_xml_consolidated[datetime_cols].apply(pd.to_datetime, utc=True)

In [21]:
print("ODS Filtered Date Range:")
print(f"Start date: {df_ods['Issue Time'].min()}")
print(f"End date: {df_ods['Issue Time'].max()}")

print("XML Filtered Date Range:")
print(f"Start date: {df_xml_consolidated['Issue Time'].min()}")
print(f"End date: {df_xml_consolidated['Issue Time'].max()}")

ODS Filtered Date Range:
Start date: 2012-04-25 12:00:00+00:00
End date: 2021-02-17 09:00:00+00:00
XML Filtered Date Range:
Start date: 2017-09-25 04:34:48+00:00
End date: 2023-08-05 11:33:52+00:00


In [24]:
####on examination the XML data has dupocate entires for the same event e'g one event the warning may be issues mutliple times during the event. 
####the ods data has just individual events so i will use as much data from ODS and remove duplicates from the xml data  
# Filter ODS data from 2013 to end of 2020
### check plots on date change check for duplicates 
df_ods_filtered = df_ods[
    (df_ods['Issue Time'] >= '2013-01-01') & 
    (df_ods['Issue Time'] <= '2020-12-31 23:59:59')
]

# Filter XML data from start of 2018 to 2023
df_xml_filtered = df_xml_consolidated[
    (df_xml_consolidated['Issue Time'] >= '2021-01-01') & 
    (df_xml_consolidated['Issue Time'] <= '2023-08-05 23:59:59')
]


In [26]:
df_xml_filtered.head(10)

Unnamed: 0,event_key,Issue Time,issue_count,first_issue,last_issue,Valid From,Valid To,warning_type,warning_text,location,warning_colour
905,2021-01-02 14:00:01+00:00_2021-01-03 10:00:01+...,2021-01-02 14:28:11+00:00,4,2021-01-02 14:28:11+00:00,2021-01-03 05:36:45+00:00,2021-01-02 14:00:01+00:00,2021-01-03 10:00:01+00:00,Moderate Low-Temperature warning,Temperatures will fall to minus 3 or 4 degrees...,Ireland,Yellow
906,2021-01-05 13:00:01+00:00_2021-01-06 10:00:01+...,2021-01-05 13:18:24+00:00,1,2021-01-05 13:18:24+00:00,2021-01-05 13:18:24+00:00,2021-01-05 13:00:01+00:00,2021-01-06 10:00:01+00:00,Moderate Low-Temperature warning,Very cold tonight with temperatures falling to...,"Connacht, Cavan, Monaghan, Donegal, Longford a...",Yellow
907,2021-01-05 13:22:38+00:00_2021-01-06 10:00:01+...,2021-01-05 13:25:36+00:00,4,2021-01-05 13:25:36+00:00,2021-01-06 05:27:18+00:00,2021-01-05 13:22:38+00:00,2021-01-06 10:00:01+00:00,Moderate Low-Temperature warning,Very cold tonight with temperatures falling to...,"Connacht, Cavan, Monaghan, Donegal, Longford a...",Yellow
908,2021-01-06 11:00:01+00:00_2021-01-07 10:00:01+...,2021-01-06 11:27:55+00:00,5,2021-01-06 11:27:55+00:00,2021-01-07 05:04:56+00:00,2021-01-06 11:00:01+00:00,2021-01-07 10:00:01+00:00,Moderate Low-Temperature warning,Very cold with minima widely falling to -3 or ...,Ireland,Yellow
909,2021-01-06 11:00:01+00:00_2021-01-07 11:00:01+...,2021-01-06 11:23:35+00:00,6,2021-01-06 11:23:35+00:00,2021-01-07 05:04:56+00:00,2021-01-06 11:00:01+00:00,2021-01-07 11:00:01+00:00,Moderate Snow-ice warning,Snow and ice will lead to treacherous conditio...,Ireland,Yellow
910,2021-01-07 11:00:01+00:00_2021-01-08 11:00:01+...,2021-01-07 11:31:57+00:00,5,2021-01-07 11:31:57+00:00,2021-01-08 05:29:14+00:00,2021-01-07 11:00:01+00:00,2021-01-08 11:00:01+00:00,Moderate Low-Temperature warning,Cold tonight with minimum temperatures of zero...,Ireland,Yellow
911,2021-01-08 11:00:01+00:00_2021-01-09 12:00:01+...,2021-01-08 11:32:31+00:00,5,2021-01-08 11:32:31+00:00,2021-01-09 05:28:16+00:00,2021-01-08 11:00:01+00:00,2021-01-09 12:00:01+00:00,Moderate Low-Temperature warning,Hazardous icy conditions in places with a wide...,Ireland,Yellow
912,2021-01-11 08:00:01+00:00_2021-01-12 12:00:01+...,2021-01-11 08:39:02+00:00,6,2021-01-11 08:39:02+00:00,2021-01-12 05:40:26+00:00,2021-01-11 08:00:01+00:00,2021-01-12 12:00:01+00:00,Moderate Rainfall warning,Persistent and occasionally heavy rainfall may...,Mayo and Sligo,Yellow
913,2021-01-17 15:00:01+00:00_2021-01-19 21:00:01+...,2021-01-17 15:59:30+00:00,1,2021-01-17 15:59:30+00:00,2021-01-17 15:59:30+00:00,2021-01-17 15:00:01+00:00,2021-01-19 21:00:01+00:00,Moderate Rainfall warning,Heavy rainfall will lead to accumulations of 3...,"Connacht, Longford, Louth, Westmeath, Meath, C...",Yellow
914,2021-01-17 16:00:01+00:00_2021-01-19 21:00:01+...,2021-01-17 16:09:32+00:00,12,2021-01-17 16:09:32+00:00,2021-01-19 17:03:11+00:00,2021-01-17 16:00:01+00:00,2021-01-19 21:00:01+00:00,Moderate Rainfall warning,Heavy rainfall will lead to accumulations of 3...,"Connacht, Longford, Louth, Westmeath, Meath, C...",Yellow


In [27]:
df_xml_filtered.to_csv('xml_warnings_2020_2023_08.csv', index=False)

In [34]:
df = pd.read_csv('xml_warnings_2020_2023_08.csv')

import pandas as pd

def find_duplicate_warnings(df):
    """
    Find and display warnings that share the same Valid To date, warning color, and location.
    Presents results in a simplified, easy-to-read format.
    """
    # Convert dates if needed
    df['Valid To'] = pd.to_datetime(df['Valid To'])
    df['Valid From'] = pd.to_datetime(df['Valid From'])
    
    # Find duplicates using key characteristics
    duplicate_mask = df.duplicated(
        subset=['Valid To', 'warning_colour', 'location'], 
        keep=False
    )
    duplicates = df[duplicate_mask].copy()
    
    # If we found duplicates, display them in a clear format
    if len(duplicates) > 0:
        print("\nFound the following duplicate warnings:")
        print("=====================================")
        
        # Sort by Valid To date to group related warnings together
        duplicates = duplicates.sort_values(['Valid To', 'warning_type', 'location'])
        
        # For each warning, show key information
        for _, warning in duplicates.iterrows():
            print(f"\nValid To: {warning['Valid To']}")
            print(f"Warning Type: {warning['warning_type']}")
            print(f"Location: {warning['location']}")
            print(f"Warning Text: {warning['warning_text']}")
            print("-" * 50)
    else:
        print("\nNo duplicate warnings were found.")

# Run the analysis
find_duplicate_warnings(df)



Valid To: 2021-01-06 10:00:01+00:00
Location: Connacht, Cavan, Monaghan, Donegal, Longford and Clare
--------------------------------------------------

Valid To: 2021-01-06 10:00:01+00:00
Location: Connacht, Cavan, Monaghan, Donegal, Longford and Clare
--------------------------------------------------

Valid To: 2021-01-19 21:00:01+00:00
Location: Connacht, Longford, Louth, Westmeath, Meath, Cavan and Monaghan
--------------------------------------------------

Valid To: 2021-01-19 21:00:01+00:00
Location: Connacht, Longford, Louth, Westmeath, Meath, Cavan and Monaghan
--------------------------------------------------

Valid To: 2021-02-12 08:00:01+00:00
Location: Munster, Connacht and Leinster
--------------------------------------------------

Valid To: 2021-02-12 08:00:01+00:00
Location: Munster, Connacht and Leinster
--------------------------------------------------

Valid To: 2021-02-23 06:00:00+00:00
Location: Carlow, Kilkenny, Wexford, Munster, Donegal, Galway, Mayo, Sligo